Introduce hermetic CUDA in Google ML projects.

1) Hermetic CUDA rules allow building wheels with GPU support on a machine without GPUs, as well as running Bazel GPU tests on a machine with only GPUs and NVIDIA driver installed. When `--config=cuda` is provided in Bazel options, Bazel will download CUDA, CUDNN and NCCL redistributions in the cache, and use them during build and test phases.

    [Default location of CUNN redistributions](https://developer.download.nvidia.com/compute/cudnn/redist/)

    [Default location of CUDA redistributions](https://developer.download.nvidia.com/compute/cuda/redist/)

    [Default location of NCCL redistributions](https://pypi.org/project/nvidia-nccl-cu12/#history)

2) To include hermetic CUDA rules in your project, add the following in the WORKSPACE of the downstream project dependent on XLA.

   Note: use `@local_tsl` instead of `@tsl` in Tensorflow project.

   ```
   load(
      "@tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
      "cuda_json_init_repository",
   )

   cuda_json_init_repository()

   load(
      "@cuda_redist_json//:distributions.bzl",
      "CUDA_REDISTRIBUTIONS",
      "CUDNN_REDISTRIBUTIONS",
   )
   load(
      "@tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
      "cuda_redist_init_repositories",
      "cudnn_redist_init_repository",
   )

   cuda_redist_init_repositories(
      cuda_redistributions = CUDA_REDISTRIBUTIONS,
   )

   cudnn_redist_init_repository(
      cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
   )

   load(
      "@tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
      "cuda_configure",
   )

   cuda_configure(name = "local_config_cuda")

   load(
      "@tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
      "nccl_redist_init_repository",
   )

   nccl_redist_init_repository()

   load(
      "@tsl//third_party/nccl/hermetic:nccl_configure.bzl",
      "nccl_configure",
   )

   nccl_configure(name = "local_config_nccl")
   ```

PiperOrigin-RevId: 662981325
This commit is contained in:
A. Unique TensorFlower 2024-08-14 10:57:53 -07:00 committed by TensorFlower Gardener
parent 06e866ffb0
commit 9b5fa66dc6
123 changed files with 7182 additions and 840 deletions

View File

@ -219,11 +219,16 @@ build:mkl_aarch64_threadpool -c opt
build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --repo_env TF_NEED_CUDA=1
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda build:cuda --@local_config_cuda//:enable_cuda
# Default CUDA and CUDNN versions.
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
# This flag is needed to include hermetic CUDA libraries for bazel tests.
test:cuda --@local_config_cuda//cuda:include_hermetic_cuda_libs=true
# CUDA: This config refers to building CUDA op kernels with clang. # CUDA: This config refers to building CUDA op kernels with clang.
build:cuda_clang --config=cuda build:cuda_clang --config=cuda
build:cuda_clang --action_env=TF_CUDA_CLANG="1"
build:cuda_clang --@local_config_cuda//:cuda_compiler=clang build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
build:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards). # Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds. # This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute # See https://developer.nvidia.com/cuda-gpus#compute
@ -232,22 +237,22 @@ build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
# release while SASS is only forward compatible inside the current # release while SASS is only forward compatible inside the current
# major release. Example: sm_80 kernels can run on sm_89 GPUs but # major release. Example: sm_80 kernels can run on sm_89 GPUs but
# not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs. # not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs.
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90" build:cuda_clang --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
# Set lld as the linker.
build:cuda_clang --host_linkopt="-fuse-ld=lld"
build:cuda_clang --host_linkopt="-lm"
build:cuda_clang --linkopt="-fuse-ld=lld"
build:cuda_clang --linkopt="-lm"
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12" build:cuda_clang_official --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8" build:cuda_clang_official --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain" build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"
# Build with nvcc for CUDA and clang for host # Build with nvcc for CUDA and clang for host
build:nvcc_clang --config=cuda build:nvcc_clang --config=cuda
# Unfortunately, cuda_configure.bzl demands this for using nvcc + clang
build:nvcc_clang --action_env=TF_CUDA_CLANG="1"
build:nvcc_clang --action_env=TF_NVCC_CLANG="1" build:nvcc_clang --action_env=TF_NVCC_CLANG="1"
build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc
@ -543,9 +548,6 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration # For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
build:rbe_linux_cuda_nvcc --config=nvcc_clang build:rbe_linux_cuda_nvcc --config=nvcc_clang
@ -630,7 +632,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/cla
# Test-related settings below this point. # Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS test:release_linux_base --local_test_jobs=HOST_CPUS
test:release_linux_base --test_env=LD_LIBRARY_PATH
# Give only the list of failed tests at the end of the log # Give only the list of failed tests at the end of the log
test:release_linux_base --test_summary=short test:release_linux_base --test_summary=short
@ -644,7 +645,6 @@ build:release_gpu_linux --config=release_cpu_linux
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
# Note that linux cpu and cuda builds share the same toolchain now. # Note that linux cpu and cuda builds share the same toolchain now.
build:release_gpu_linux --config=cuda_clang_official build:release_gpu_linux --config=cuda_clang_official
test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

View File

@ -253,13 +253,21 @@ There are two ways to run TensorFlow unit tests.
export flags="--config=opt -k" export flags="--config=opt -k"
``` ```
If the tests are to be run on the GPU, add CUDA paths to LD_LIBRARY_PATH and If the tests are to be run on the GPU:
add the `cuda` option flag * For TensorFlow versions starting from v.2.18.0:
Add the `cuda` option flag.
```bash ```bash
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH" export flags="--config=opt --config=cuda -k"
export flags="--config=opt --config=cuda -k" ```
```
* For TensorFlow versions prior v.2.18.0:
Add CUDA paths to LD_LIBRARY_PATH and add the `cuda` option flag.
```bash
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
export flags="--config=opt --config=cuda -k"
```
For example, to run all tests under tensorflow/python, do: For example, to run all tests under tensorflow/python, do:

View File

@ -21,6 +21,14 @@
* TensorRT support is disabled in CUDA builds for code health improvement. * TensorRT support is disabled in CUDA builds for code health improvement.
* Hermetic CUDA support is added.
Hermetic CUDA uses a specific downloadable version of CUDA instead of the
users locally installed CUDA. Bazel will download CUDA, CUDNN and NCCL
distributions, and then use CUDA libraries and tools as dependencies in
various Bazel targets. This enables more reproducible builds for Google ML
projects and supported CUDA versions.
### Known Caveats ### Known Caveats
* <CAVEATS REGARDING THE RELEASE (BUT NOT BREAKING CHANGES).> * <CAVEATS REGARDING THE RELEASE (BUT NOT BREAKING CHANGES).>

View File

@ -64,3 +64,50 @@ tf_workspace1()
load("@//tensorflow:workspace0.bzl", "tf_workspace0") load("@//tensorflow:workspace0.bzl", "tf_workspace0")
tf_workspace0() tf_workspace0()
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
"cuda_json_init_repository",
)
cuda_json_init_repository()
load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"cuda_redist_init_repositories",
"cudnn_redist_init_repository",
)
cuda_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS,
)
cudnn_redist_init_repository(
cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
"cuda_configure",
)
cuda_configure(name = "local_config_cuda")
load(
"@local_tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
"nccl_redist_init_repository",
)
nccl_redist_init_repository()
load(
"@local_tsl//third_party/nccl/hermetic:nccl_configure.bzl",
"nccl_configure",
)
nccl_configure(name = "local_config_nccl")

View File

@ -216,6 +216,8 @@ EOF
bazel cquery \ bazel cquery \
--experimental_cc_shared_library \ --experimental_cc_shared_library \
--@local_config_cuda//:enable_cuda \ --@local_config_cuda//:enable_cuda \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29" \
"somepath(//tensorflow/tools/pip_package:build_pip_package, " \ "somepath(//tensorflow/tools/pip_package:build_pip_package, " \
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\
@ -237,6 +239,8 @@ EOF
bazel cquery \ bazel cquery \
--experimental_cc_shared_library \ --experimental_cc_shared_library \
--@local_config_cuda//:enable_cuda \ --@local_config_cuda//:enable_cuda \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29" \
--define framework_shared_object=false \ --define framework_shared_object=false \
"somepath(//tensorflow/tools/pip_package:build_pip_package, " \ "somepath(//tensorflow/tools/pip_package:build_pip_package, " \
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\

View File

@ -216,6 +216,8 @@ EOF
bazel cquery \ bazel cquery \
--experimental_cc_shared_library \ --experimental_cc_shared_library \
--@local_config_cuda//:enable_cuda \ --@local_config_cuda//:enable_cuda \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29" \
"somepath(//tensorflow/tools/pip_package:wheel, " \ "somepath(//tensorflow/tools/pip_package:wheel, " \
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\
@ -237,6 +239,8 @@ EOF
bazel cquery \ bazel cquery \
--experimental_cc_shared_library \ --experimental_cc_shared_library \
--@local_config_cuda//:enable_cuda \ --@local_config_cuda//:enable_cuda \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29" \
--define framework_shared_object=false \ --define framework_shared_object=false \
"somepath(//tensorflow/tools/pip_package:wheel, " \ "somepath(//tensorflow/tools/pip_package:wheel, " \
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\

View File

@ -16,7 +16,6 @@
import argparse import argparse
import errno import errno
import glob
import json import json
import os import os
import platform import platform
@ -31,8 +30,6 @@ except ImportError:
from distutils.spawn import find_executable as which from distutils.spawn import find_executable as which
# pylint: enable=g-import-not-at-top # pylint: enable=g-import-not-at-top
_DEFAULT_CUDA_VERSION = '11'
_DEFAULT_CUDNN_VERSION = '2'
_DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0'
_SUPPORTED_ANDROID_NDK_VERSIONS = [19, 20, 21, 25] _SUPPORTED_ANDROID_NDK_VERSIONS = [19, 20, 21, 25]
@ -127,6 +124,12 @@ def write_action_env_to_bazelrc(var_name, var):
write_to_bazelrc('build --action_env {}="{}"'.format(var_name, str(var))) write_to_bazelrc('build --action_env {}="{}"'.format(var_name, str(var)))
def write_repo_env_to_bazelrc(config_name, var_name, var):
write_to_bazelrc(
'build:{} --repo_env {}="{}"'.format(config_name, var_name, str(var))
)
def run_shell(cmd, allow_non_zero=False, stderr=None): def run_shell(cmd, allow_non_zero=False, stderr=None):
if stderr is None: if stderr is None:
stderr = sys.stdout stderr = sys.stdout
@ -238,7 +241,7 @@ def setup_python(environ_cp):
write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path)) write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path))
environ_cp['PYTHON_BIN_PATH'] = python_bin_path environ_cp['PYTHON_BIN_PATH'] = python_bin_path
# If choosen python_lib_path is from a path specified in the PYTHONPATH # If chosen python_lib_path is from a path specified in the PYTHONPATH
# variable, need to tell bazel to include PYTHONPATH # variable, need to tell bazel to include PYTHONPATH
if environ_cp.get('PYTHONPATH'): if environ_cp.get('PYTHONPATH'):
python_paths = environ_cp.get('PYTHONPATH').split(':') python_paths = environ_cp.get('PYTHONPATH').split(':')
@ -777,11 +780,6 @@ def get_ndk_api_level(environ_cp, android_ndk_home_path):
def set_gcc_host_compiler_path(environ_cp): def set_gcc_host_compiler_path(environ_cp):
"""Set GCC_HOST_COMPILER_PATH.""" """Set GCC_HOST_COMPILER_PATH."""
default_gcc_host_compiler_path = which('gcc') or '' default_gcc_host_compiler_path = which('gcc') or ''
cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH')
if os.path.islink(cuda_bin_symlink):
# os.readlink is only available in linux
default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink)
gcc_host_compiler_path = prompt_loop_or_load_from_env( gcc_host_compiler_path = prompt_loop_or_load_from_env(
environ_cp, environ_cp,
@ -946,91 +944,42 @@ def disable_clang_offsetof_extension(clang_version):
write_to_bazelrc('build --copt=-Wno-gnu-offsetof-extensions') write_to_bazelrc('build --copt=-Wno-gnu-offsetof-extensions')
def set_tf_cuda_paths(environ_cp): def set_hermetic_cuda_version(environ_cp):
"""Set TF_CUDA_PATHS.""" """Set HERMETIC_CUDA_VERSION."""
ask_cuda_paths = (
'Please specify the comma-separated list of base paths to look for CUDA '
'libraries and headers. [Leave empty to use the default]: ')
tf_cuda_paths = get_from_env_or_user_or_default(environ_cp, 'TF_CUDA_PATHS',
ask_cuda_paths, '')
if tf_cuda_paths:
environ_cp['TF_CUDA_PATHS'] = tf_cuda_paths
def set_tf_cuda_version(environ_cp):
"""Set TF_CUDA_VERSION."""
ask_cuda_version = ( ask_cuda_version = (
'Please specify the CUDA SDK version you want to use. ' 'Please specify the hermetic CUDA version you want to use '
'[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION 'or leave empty to use the default version. '
tf_cuda_version = get_from_env_or_user_or_default(environ_cp, )
'TF_CUDA_VERSION', hermetic_cuda_version = get_from_env_or_user_or_default(
ask_cuda_version, environ_cp, 'HERMETIC_CUDA_VERSION', ask_cuda_version, None
_DEFAULT_CUDA_VERSION) )
environ_cp['TF_CUDA_VERSION'] = tf_cuda_version if hermetic_cuda_version:
environ_cp['HERMETIC_CUDA_VERSION'] = hermetic_cuda_version
write_repo_env_to_bazelrc(
'cuda', 'HERMETIC_CUDA_VERSION', hermetic_cuda_version
)
def set_tf_cudnn_version(environ_cp): def set_hermetic_cudnn_version(environ_cp):
"""Set TF_CUDNN_VERSION.""" """Set HERMETIC_CUDNN_VERSION."""
ask_cudnn_version = ( ask_cudnn_version = (
'Please specify the cuDNN version you want to use. ' 'Please specify the hermetic cuDNN version you want to use '
'[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION 'or leave empty to use the default version. '
tf_cudnn_version = get_from_env_or_user_or_default(environ_cp, )
'TF_CUDNN_VERSION', hermetic_cudnn_version = get_from_env_or_user_or_default(
ask_cudnn_version, environ_cp, 'HERMETIC_CUDNN_VERSION', ask_cudnn_version, None
_DEFAULT_CUDNN_VERSION) )
environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version if hermetic_cudnn_version:
environ_cp['HERMETIC_CUDNN_VERSION'] = hermetic_cudnn_version
write_repo_env_to_bazelrc(
'cuda', 'HERMETIC_CUDNN_VERSION', hermetic_cudnn_version
)
def set_tf_nccl_version(environ_cp): def set_hermetic_cuda_compute_capabilities(environ_cp):
"""Set TF_NCCL_VERSION.""" """Set HERMETIC_CUDA_COMPUTE_CAPABILITIES."""
if not is_linux():
raise ValueError('Currently NCCL is only supported on Linux platform.')
if 'TF_NCCL_VERSION' in environ_cp:
return
ask_nccl_version = (
'Please specify the locally installed NCCL version you want to use. '
'[Leave empty to use http://github.com/nvidia/nccl]: ')
tf_nccl_version = get_from_env_or_user_or_default(environ_cp,
'TF_NCCL_VERSION',
ask_nccl_version, '')
environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
def get_native_cuda_compute_capabilities(environ_cp):
"""Get native cuda compute capabilities.
Args:
environ_cp: copy of the os.environ.
Returns:
string of native cuda compute capabilities, separated by comma.
"""
device_query_bin = os.path.join(
environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery')
if os.path.isfile(device_query_bin) and os.access(device_query_bin, os.X_OK):
try:
output = run_shell(device_query_bin).split('\n')
pattern = re.compile('\d*\\.\d*')
output = [pattern.search(x) for x in output if 'Capability' in x]
output = ','.join(x.group() for x in output if x is not None)
except subprocess.CalledProcessError:
output = ''
else:
output = ''
return output
def set_tf_cuda_compute_capabilities(environ_cp):
"""Set TF_CUDA_COMPUTE_CAPABILITIES."""
while True: while True:
native_cuda_compute_capabilities = get_native_cuda_compute_capabilities( default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES
environ_cp)
if not native_cuda_compute_capabilities:
default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES
else:
default_cuda_compute_capabilities = native_cuda_compute_capabilities
ask_cuda_compute_capabilities = ( ask_cuda_compute_capabilities = (
'Please specify a list of comma-separated CUDA compute capabilities ' 'Please specify a list of comma-separated CUDA compute capabilities '
@ -1042,15 +991,20 @@ def set_tf_cuda_compute_capabilities(environ_cp):
'significantly increases your build time and binary size, and that ' 'significantly increases your build time and binary size, and that '
'TensorFlow only supports compute capabilities >= 3.5 [Default is: ' 'TensorFlow only supports compute capabilities >= 3.5 [Default is: '
'%s]: ' % default_cuda_compute_capabilities) '%s]: ' % default_cuda_compute_capabilities)
tf_cuda_compute_capabilities = get_from_env_or_user_or_default( hermetic_cuda_compute_capabilities = get_from_env_or_user_or_default(
environ_cp, 'TF_CUDA_COMPUTE_CAPABILITIES', environ_cp,
ask_cuda_compute_capabilities, default_cuda_compute_capabilities) 'HERMETIC_CUDA_COMPUTE_CAPABILITIES',
ask_cuda_compute_capabilities,
default_cuda_compute_capabilities,
)
# Check whether all capabilities from the input is valid # Check whether all capabilities from the input is valid
all_valid = True all_valid = True
# Remove all whitespace characters before splitting the string # Remove all whitespace characters before splitting the string
# that users may insert by accident, as this will result in error # that users may insert by accident, as this will result in error
tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split()) hermetic_cuda_compute_capabilities = ''.join(
for compute_capability in tf_cuda_compute_capabilities.split(','): hermetic_cuda_compute_capabilities.split()
)
for compute_capability in hermetic_cuda_compute_capabilities.split(','):
m = re.match('[0-9]+.[0-9]+', compute_capability) m = re.match('[0-9]+.[0-9]+', compute_capability)
if not m: if not m:
# We now support sm_35,sm_50,sm_60,compute_70. # We now support sm_35,sm_50,sm_60,compute_70.
@ -1085,13 +1039,30 @@ def set_tf_cuda_compute_capabilities(environ_cp):
break break
# Reset and Retry # Reset and Retry
environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = '' environ_cp['HERMETIC_CUDA_COMPUTE_CAPABILITIES'] = ''
# Set TF_CUDA_COMPUTE_CAPABILITIES # Set HERMETIC_CUDA_COMPUTE_CAPABILITIES
environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = tf_cuda_compute_capabilities environ_cp['HERMETIC_CUDA_COMPUTE_CAPABILITIES'] = (
write_action_env_to_bazelrc( hermetic_cuda_compute_capabilities
'TF_CUDA_COMPUTE_CAPABILITIES', tf_cuda_compute_capabilities
) )
write_repo_env_to_bazelrc(
'cuda',
'HERMETIC_CUDA_COMPUTE_CAPABILITIES',
hermetic_cuda_compute_capabilities,
)
def set_cuda_local_path(environ_cp, dist_name, env_var):
ask_path = (
'Please specify the local {} path you want to use '
'or leave empty to use the default version. '
).format(dist_name)
local_path = get_from_env_or_user_or_default(
environ_cp, env_var, ask_path, None
)
if local_path:
environ_cp[env_var] = local_path
write_repo_env_to_bazelrc('cuda', env_var, local_path)
def set_other_cuda_vars(environ_cp): def set_other_cuda_vars(environ_cp):
@ -1209,62 +1180,6 @@ def configure_ios(environ_cp):
symlink_force(filepath, new_filepath) symlink_force(filepath, new_filepath)
def validate_cuda_config(environ_cp):
"""Run find_cuda_config.py and return cuda_toolkit_path, or None."""
def maybe_encode_env(env):
"""Encodes unicode in env to str on Windows python 2.x."""
if not is_windows() or sys.version_info[0] != 2:
return env
for k, v in env.items():
if isinstance(k, unicode):
k = k.encode('ascii')
if isinstance(v, unicode):
v = v.encode('ascii')
env[k] = v
return env
cuda_libraries = ['cuda', 'cudnn']
if is_linux():
if environ_cp.get('TF_NCCL_VERSION', None):
cuda_libraries.append('nccl')
paths = glob.glob('**/third_party/gpus/find_cuda_config.py', recursive=True)
if not paths:
raise FileNotFoundError(
"Can't find 'find_cuda_config.py' script inside working directory")
proc = subprocess.Popen(
[environ_cp['PYTHON_BIN_PATH'], paths[0]] + cuda_libraries,
stdout=subprocess.PIPE,
env=maybe_encode_env(environ_cp))
if proc.wait():
# Errors from find_cuda_config.py were sent to stderr.
print('Asking for detailed CUDA configuration...\n')
return False
config = dict(
tuple(line.decode('ascii').rstrip().split(': ')) for line in proc.stdout)
print('Found CUDA %s in:' % config['cuda_version'])
print(' %s' % config['cuda_library_dir'])
print(' %s' % config['cuda_include_dir'])
print('Found cuDNN %s in:' % config['cudnn_version'])
print(' %s' % config['cudnn_library_dir'])
print(' %s' % config['cudnn_include_dir'])
if config.get('nccl_version', None):
print('Found NCCL %s in:' % config['nccl_version'])
print(' %s' % config['nccl_library_dir'])
print(' %s' % config['nccl_include_dir'])
print('\n')
environ_cp['CUDA_TOOLKIT_PATH'] = config['cuda_toolkit_path']
return True
def get_gcc_compiler(environ_cp): def get_gcc_compiler(environ_cp):
gcc_env = environ_cp.get('CXX') or environ_cp.get('CC') or which('gcc') gcc_env = environ_cp.get('CXX') or environ_cp.get('CC') or which('gcc')
if gcc_env is not None: if gcc_env is not None:
@ -1363,50 +1278,14 @@ def main():
else: else:
environ_cp['TF_NEED_CUDA'] = str( environ_cp['TF_NEED_CUDA'] = str(
int(get_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False))) int(get_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)))
if (environ_cp.get('TF_NEED_CUDA') == '1' and if environ_cp.get('TF_NEED_CUDA') == '1':
'TF_CUDA_CONFIG_REPO' not in environ_cp): set_hermetic_cuda_version(environ_cp)
set_hermetic_cudnn_version(environ_cp)
set_hermetic_cuda_compute_capabilities(environ_cp)
set_cuda_local_path(environ_cp, 'CUDA', 'LOCAL_CUDA_PATH')
set_cuda_local_path(environ_cp, 'CUDNN', 'LOCAL_CUDNN_PATH')
set_cuda_local_path(environ_cp, 'NCCL', 'LOCAL_NCCL_PATH')
environ_save = dict(environ_cp)
for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
if validate_cuda_config(environ_cp):
cuda_env_names = [
'TF_CUDA_VERSION',
'TF_CUBLAS_VERSION',
'TF_CUDNN_VERSION',
'TF_NCCL_VERSION',
'TF_CUDA_PATHS',
# Items below are for backwards compatibility when not using
# TF_CUDA_PATHS.
'CUDA_TOOLKIT_PATH',
'CUDNN_INSTALL_PATH',
'NCCL_INSTALL_PATH',
'NCCL_HDR_PATH',
]
# Note: set_action_env_var above already writes to bazelrc.
for name in cuda_env_names:
if name in environ_cp:
write_action_env_to_bazelrc(name, environ_cp[name])
break
# Restore settings changed below if CUDA config could not be validated.
environ_cp = dict(environ_save)
set_tf_cuda_version(environ_cp)
set_tf_cudnn_version(environ_cp)
if is_linux():
set_tf_nccl_version(environ_cp)
set_tf_cuda_paths(environ_cp)
else:
raise UserInputError(
'Invalid CUDA setting were provided %d '
'times in a row. Assuming to be a scripting mistake.'
% _DEFAULT_PROMPT_ASK_ATTEMPTS
)
set_tf_cuda_compute_capabilities(environ_cp)
if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
'LD_LIBRARY_PATH') != '1': 'LD_LIBRARY_PATH') != '1':
write_action_env_to_bazelrc('LD_LIBRARY_PATH', write_action_env_to_bazelrc('LD_LIBRARY_PATH',

View File

@ -7,6 +7,10 @@
""" """
load("@bazel_skylib//lib:paths.bzl", "paths") load("@bazel_skylib//lib:paths.bzl", "paths")
load(
"@local_xla//xla:lit.bzl",
"lit_script_with_xla_gpu_cuda_data_dir",
)
# Default values used by the test runner. # Default values used by the test runner.
_default_test_file_exts = ["mlir", ".pbtxt", ".td"] _default_test_file_exts = ["mlir", ".pbtxt", ".td"]
@ -76,7 +80,8 @@ def glob_lit_tests(
tags_override = {}, tags_override = {},
driver = _default_driver, driver = _default_driver,
features = [], features = [],
exec_properties = {}): exec_properties = {},
hermetic_cuda_data_dir = None):
"""Creates all plausible Lit tests (and their inputs) under this directory. """Creates all plausible Lit tests (and their inputs) under this directory.
Args: Args:
@ -94,6 +99,8 @@ def glob_lit_tests(
and specifying a default driver will abort the tests. and specifying a default driver will abort the tests.
features: [str], list of extra features to enable. features: [str], list of extra features to enable.
exec_properties: a dictionary of properties to pass on. exec_properties: a dictionary of properties to pass on.
hermetic_cuda_data_dir: string. If set, the tests will be run with a
`--xla_gpu_cuda_data_dir` flag set to the hermetic CUDA data directory.
""" """
# Ignore some patterns by default for tests and input data. # Ignore some patterns by default for tests and input data.
@ -108,12 +115,24 @@ def glob_lit_tests(
# failure. # failure.
all_tests = [] all_tests = []
for curr_test in tests: for curr_test in tests:
all_tests.append(curr_test + ".test") final_test_name = curr_test
if hermetic_cuda_data_dir:
output_file = "with_xla_gpu_cuda_data_dir_{}".format(curr_test)
rule_name = "script_{}".format(output_file)
lit_script_with_xla_gpu_cuda_data_dir(
rule_name,
curr_test,
output_file,
hermetic_cuda_data_dir,
)
final_test_name = output_file
all_tests.append(final_test_name + ".test")
# Instantiate this test with updated parameters. # Instantiate this test with updated parameters.
_run_lit_test( _run_lit_test(
name = curr_test + ".test", name = final_test_name + ".test",
data = data + [curr_test] + per_test_extra_data.get(curr_test, []), data = data + [final_test_name] +
per_test_extra_data.get(curr_test, []),
size = size_override.get(curr_test, default_size), size = size_override.get(curr_test, default_size),
tags = default_tags + tags_override.get(curr_test, []), tags = default_tags + tags_override.get(curr_test, []),
driver = driver, driver = driver,

View File

@ -14,6 +14,7 @@ glob_lit_tests(
"no_rocm", "no_rocm",
], ],
driver = "//tensorflow/compiler/mlir:run_lit.sh", driver = "//tensorflow/compiler/mlir:run_lit.sh",
hermetic_cuda_data_dir = "%S/../../../../../../../../cuda_nvcc",
test_file_exts = ["mlir"], test_file_exts = ["mlir"],
) )

View File

@ -1,4 +1,5 @@
load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//lib:selects.bzl", "selects")
load("@local_xla//xla/tsl:tsl.bzl", "if_hermetic_cuda_libs")
load( load(
"//tensorflow:tensorflow.bzl", "//tensorflow:tensorflow.bzl",
"clean_dep", "clean_dep",
@ -140,6 +141,19 @@ filegroup(
visibility = ["//visibility:private"], visibility = ["//visibility:private"],
) )
cc_library(
name = "gpu_runtime_hermetic_cuda_deps",
visibility = ["//visibility:public"],
deps = if_hermetic_cuda_libs([
"@local_xla//xla/tsl/cuda:cudart",
"@local_xla//xla/tsl/cuda:cublas",
"@local_xla//xla/tsl/cuda:cufft",
"@local_xla//xla/tsl/cuda:cusolver",
"@local_xla//xla/tsl/cuda:cusparse",
"@local_xla//xla/tsl/cuda:cudnn",
]),
)
tf_cuda_library( tf_cuda_library(
name = "gpu_runtime_impl", name = "gpu_runtime_impl",
srcs = [ srcs = [
@ -159,6 +173,7 @@ tf_cuda_library(
"@local_xla//xla/stream_executor/cuda:cuda_platform", "@local_xla//xla/stream_executor/cuda:cuda_platform",
"@local_xla//xla/stream_executor/gpu:gpu_stream", "@local_xla//xla/stream_executor/gpu:gpu_stream",
"@local_xla//xla/stream_executor/gpu:gpu_cudamallocasync_allocator", "@local_xla//xla/stream_executor/gpu:gpu_cudamallocasync_allocator",
":gpu_runtime_hermetic_cuda_deps",
], ],
defines = if_linux_x86_64(["TF_PLATFORM_LINUX_X86_64"]), defines = if_linux_x86_64(["TF_PLATFORM_LINUX_X86_64"]),
features = ["-layering_check"], features = ["-layering_check"],

View File

@ -235,6 +235,7 @@ tf_staging/third_party/googleapis/build_rules.bzl:
tf_staging/third_party/googleapis/googleapis.BUILD: tf_staging/third_party/googleapis/googleapis.BUILD:
tf_staging/third_party/googleapis/repository_rules.bzl: tf_staging/third_party/googleapis/repository_rules.bzl:
tf_staging/third_party/gpus/BUILD: tf_staging/third_party/gpus/BUILD:
tf_staging/third_party/gpus/compiler_common_tools.bzl:
tf_staging/third_party/gpus/crosstool/BUILD.rocm.tpl: tf_staging/third_party/gpus/crosstool/BUILD.rocm.tpl:
tf_staging/third_party/gpus/crosstool/BUILD.sycl.tpl: tf_staging/third_party/gpus/crosstool/BUILD.sycl.tpl:
tf_staging/third_party/gpus/crosstool/BUILD.tpl: tf_staging/third_party/gpus/crosstool/BUILD.tpl:
@ -252,6 +253,27 @@ tf_staging/third_party/gpus/cuda/LICENSE:
tf_staging/third_party/gpus/cuda/build_defs.bzl.tpl: tf_staging/third_party/gpus/cuda/build_defs.bzl.tpl:
tf_staging/third_party/gpus/cuda/cuda_config.h.tpl: tf_staging/third_party/gpus/cuda/cuda_config.h.tpl:
tf_staging/third_party/gpus/cuda/cuda_config.py.tpl: tf_staging/third_party/gpus/cuda/cuda_config.py.tpl:
tf_staging/third_party/gpus/cuda/hermetic/BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/BUILD:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cccl.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_configure.bzl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cudnn.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_json_init_repository.bzl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_nvml.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_nvprune.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_nvtx.BUILD.tpl:
tf_staging/third_party/gpus/cuda/hermetic/cuda_redist_init_repositories.bzl:
tf_staging/third_party/gpus/cuda_configure.bzl: tf_staging/third_party/gpus/cuda_configure.bzl:
tf_staging/third_party/gpus/find_cuda_config:.py tf_staging/third_party/gpus/find_cuda_config:.py
tf_staging/third_party/gpus/rocm/BUILD.tpl: tf_staging/third_party/gpus/rocm/BUILD.tpl:
@ -284,6 +306,9 @@ tf_staging/third_party/nccl/archive.BUILD:
tf_staging/third_party/nccl/archive.patch: tf_staging/third_party/nccl/archive.patch:
tf_staging/third_party/nccl/build_defs.bzl.tpl: tf_staging/third_party/nccl/build_defs.bzl.tpl:
tf_staging/third_party/nccl/generated_names.bzl.tpl: tf_staging/third_party/nccl/generated_names.bzl.tpl:
tf_staging/third_party/nccl/hermetic/BUILD:
tf_staging/third_party/nccl/hermetic/cuda_nccl.BUILD.tpl:
tf_staging/third_party/nccl/hermetic/nccl_configure.bzl:
tf_staging/third_party/nccl/nccl_configure.bzl: tf_staging/third_party/nccl/nccl_configure.bzl:
tf_staging/third_party/nccl/system.BUILD.tpl: tf_staging/third_party/nccl/system.BUILD.tpl:
tf_staging/third_party/nlohmann_json.BUILD: tf_staging/third_party/nlohmann_json.BUILD:

View File

@ -1,5 +1,7 @@
# Using TensorRT in TensorFlow (TF-TRT) # Using TensorRT in TensorFlow (TF-TRT)
Note: Starting from v.2.18.0, TensorFlow doesn't support TensorRT.
This module provides necessary bindings and introduces `TRTEngineOp` operator This module provides necessary bindings and introduces `TRTEngineOp` operator
that wraps a subgraph in TensorRT. This module is under active development. that wraps a subgraph in TensorRT. This module is under active development.

View File

@ -6,6 +6,7 @@ load("//tensorflow:strict.default.bzl", "py_strict_library", "py_strict_test")
# Placeholder: load py_proto_library # Placeholder: load py_proto_library
load( load(
"//tensorflow:tensorflow.bzl", "//tensorflow:tensorflow.bzl",
"if_hermetic_cuda_tools",
"if_not_windows", "if_not_windows",
"if_oss", "if_oss",
"if_xla_available", "if_xla_available",
@ -1046,6 +1047,13 @@ tf_python_pybind_extension(
"python_api_dispatcher.h", "python_api_dispatcher.h",
"//tensorflow/python/lib/core:safe_pyobject_ptr_required_hdrs", "//tensorflow/python/lib/core:safe_pyobject_ptr_required_hdrs",
], ],
# This data is needed to add hermetic CUDA tools in python runfiles.
data = if_hermetic_cuda_tools(
[
"@cuda_nvcc//:ptxas",
"@cuda_nvcc//:nvvm",
],
),
enable_stub_generation = True, enable_stub_generation = True,
pytype_srcs = [ pytype_srcs = [
"_pywrap_python_api_dispatcher.pyi", "_pywrap_python_api_dispatcher.pyi",

View File

@ -70,6 +70,7 @@ load(
"tsl_gpu_library", "tsl_gpu_library",
_cc_header_only_library = "cc_header_only_library", _cc_header_only_library = "cc_header_only_library",
_if_cuda_or_rocm = "if_cuda_or_rocm", _if_cuda_or_rocm = "if_cuda_or_rocm",
_if_hermetic_cuda_tools = "if_hermetic_cuda_tools",
_if_nccl = "if_nccl", _if_nccl = "if_nccl",
_transitive_hdrs = "transitive_hdrs", _transitive_hdrs = "transitive_hdrs",
) )
@ -800,7 +801,7 @@ def tf_cc_shared_object(
testonly = kwargs.pop("testonly", False) testonly = kwargs.pop("testonly", False)
for name_os, name_os_major, name_os_full in names: for name_os, name_os_major, name_os_full in names:
# Windows DLLs cant be versioned # Windows DLLs can't be versioned
if name_os.endswith(".dll"): if name_os.endswith(".dll"):
name_os_major = name_os name_os_major = name_os
name_os_full = name_os name_os_full = name_os
@ -1075,7 +1076,8 @@ def tf_cc_binary(
], ],
), ),
tags = tags, tags = tags,
data = depset(data + added_data_deps), data = depset(data + added_data_deps).to_list() +
tf_binary_additional_srcs(fullversion = True),
linkopts = linkopts + _rpath_linkopts(name_os), linkopts = linkopts + _rpath_linkopts(name_os),
visibility = visibility, visibility = visibility,
**kwargs **kwargs
@ -1568,7 +1570,7 @@ def tf_cc_test(
), ),
data = data + data = data +
tf_binary_dynamic_kernel_dsos() + tf_binary_dynamic_kernel_dsos() +
tf_binary_additional_srcs(), tf_binary_additional_srcs(fullversion = True),
exec_properties = tf_exec_properties(kwargs), exec_properties = tf_exec_properties(kwargs),
**kwargs **kwargs
) )
@ -1733,6 +1735,7 @@ def tf_gpu_only_cc_test(
tf_gpu_kernel_library( tf_gpu_kernel_library(
name = gpu_lib_name, name = gpu_lib_name,
srcs = srcs + tf_binary_additional_srcs(), srcs = srcs + tf_binary_additional_srcs(),
data = tf_binary_additional_srcs(fullversion = True),
deps = deps, deps = deps,
testonly = 1, testonly = 1,
features = features, features = features,
@ -3574,3 +3577,6 @@ def replace_with_portable_tf_lib_when_required(non_portable_tf_deps, use_lib_wit
def tf_python_framework_friends(): def tf_python_framework_friends():
return ["//tensorflow:__subpackages__"] return ["//tensorflow:__subpackages__"]
def if_hermetic_cuda_tools(if_true, if_false = []):
return _if_hermetic_cuda_tools(if_true, if_false)

View File

@ -69,6 +69,36 @@ def prepare_headers(headers: list[str], srcs_dir: str) -> None:
srcs_dir: target directory where headers are copied to. srcs_dir: target directory where headers are copied to.
""" """
path_to_exclude = [ path_to_exclude = [
"cuda_cccl/_virtual_includes",
"cuda_cublas/_virtual_includes",
"cuda_cudart/_virtual_includes",
"cuda_cudnn/_virtual_includes",
"cuda_cufft/_virtual_includes",
"cuda_cupti/_virtual_includes",
"cuda_curand/_virtual_includes",
"cuda_cusolver/_virtual_includes",
"cuda_cusparse/_virtual_includes",
"cuda_nccl/_virtual_includes",
"cuda_nvcc/_virtual_includes",
"cuda_nvjitlink/_virtual_includes",
"cuda_nvml/_virtual_includes",
"cuda_nvrtc/_virtual_includes",
"cuda_nvtx/_virtual_includes",
"external/cuda_cccl",
"external/cuda_cublas",
"external/cuda_cudart",
"external/cuda_cudnn",
"external/cuda_cufft",
"external/cuda_cupti",
"external/cuda_curand",
"external/cuda_cusolver",
"external/cuda_cusparse",
"external/cuda_nccl",
"external/cuda_nvcc",
"external/cuda_nvjitlink",
"external/cuda_nvml",
"external/cuda_nvrtc",
"external/cuda_nvtx",
"external/pypi", "external/pypi",
"external/jsoncpp_git/src", "external/jsoncpp_git/src",
"local_config_cuda/cuda/_virtual_includes", "local_config_cuda/cuda/_virtual_includes",

View File

@ -216,6 +216,8 @@ EOF
bazel cquery \ bazel cquery \
--experimental_cc_shared_library \ --experimental_cc_shared_library \
--@local_config_cuda//:enable_cuda \ --@local_config_cuda//:enable_cuda \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29" \
"somepath(//tensorflow/tools/pip_package:build_pip_package, " \ "somepath(//tensorflow/tools/pip_package:build_pip_package, " \
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\
@ -236,6 +238,8 @@ EOF
bazel cquery \ bazel cquery \
--experimental_cc_shared_library \ --experimental_cc_shared_library \
--@local_config_cuda//:enable_cuda \ --@local_config_cuda//:enable_cuda \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29" \
--define framework_shared_object=false \ --define framework_shared_object=false \
"somepath(//tensorflow/tools/pip_package:build_pip_package, " \ "somepath(//tensorflow/tools/pip_package:build_pip_package, " \
"@local_config_cuda//cuda:cudart + "\ "@local_config_cuda//cuda:cudart + "\

View File

@ -225,8 +225,8 @@ def initialize_rbe_configs():
tensorflow_rbe_config( tensorflow_rbe_config(
name = "ubuntu20.04-clang_manylinux2014-cuda12.3-cudnn8.9", name = "ubuntu20.04-clang_manylinux2014-cuda12.3-cudnn8.9",
compiler = "/usr/lib/llvm-18/bin/clang", compiler = "/usr/lib/llvm-18/bin/clang",
cuda_version = "12.3", cuda_version = "12.3.2",
cudnn_version = "8.9", cudnn_version = "8.9.7.29",
os = "ubuntu20.04-manylinux2014-multipython", os = "ubuntu20.04-manylinux2014-multipython",
python_versions = ["3.9", "3.10", "3.11", "3.12"], python_versions = ["3.9", "3.10", "3.11", "3.12"],
sysroot = "/dt9", sysroot = "/dt9",
@ -236,8 +236,8 @@ def initialize_rbe_configs():
tensorflow_rbe_config( tensorflow_rbe_config(
name = "ubuntu20.04-clang_manylinux2014-cuda12.3-cudnn9.1", name = "ubuntu20.04-clang_manylinux2014-cuda12.3-cudnn9.1",
compiler = "/usr/lib/llvm-18/bin/clang", compiler = "/usr/lib/llvm-18/bin/clang",
cuda_version = "12.3", cuda_version = "12.3.2",
cudnn_version = "9.1", cudnn_version = "9.1.1",
os = "ubuntu20.04-manylinux2014-multipython", os = "ubuntu20.04-manylinux2014-multipython",
python_versions = ["3.9", "3.10", "3.11", "3.12"], python_versions = ["3.9", "3.10", "3.11", "3.12"],
sysroot = "/dt9", sysroot = "/dt9",
@ -248,8 +248,8 @@ def initialize_rbe_configs():
name = "ubuntu20.04-gcc9_manylinux2014-cuda12.3-cudnn8.9", name = "ubuntu20.04-gcc9_manylinux2014-cuda12.3-cudnn8.9",
compiler = "/dt9/usr/bin/gcc", compiler = "/dt9/usr/bin/gcc",
compiler_prefix = "/usr/bin", compiler_prefix = "/usr/bin",
cuda_version = "12.3", cuda_version = "12.3.2",
cudnn_version = "8.9", cudnn_version = "8.9.7.29",
os = "ubuntu20.04-manylinux2014-multipython", os = "ubuntu20.04-manylinux2014-multipython",
python_versions = ["3.9", "3.10", "3.11", "3.12"], python_versions = ["3.9", "3.10", "3.11", "3.12"],
python_install_path = "/usr/local", python_install_path = "/usr/local",
@ -258,8 +258,8 @@ def initialize_rbe_configs():
tensorflow_rbe_config( tensorflow_rbe_config(
name = "ubuntu22.04-clang_manylinux2014-cuda12.3-cudnn8.9", name = "ubuntu22.04-clang_manylinux2014-cuda12.3-cudnn8.9",
compiler = "/usr/lib/llvm-18/bin/clang", compiler = "/usr/lib/llvm-18/bin/clang",
cuda_version = "12.3", cuda_version = "12.3.2",
cudnn_version = "8.9", cudnn_version = "8.9.7.29",
os = "ubuntu22.04-manylinux2014-multipython", os = "ubuntu22.04-manylinux2014-multipython",
python_versions = ["3.9", "3.10", "3.11", "3.12"], python_versions = ["3.9", "3.10", "3.11", "3.12"],
sysroot = "/dt9", sysroot = "/dt9",
@ -270,8 +270,8 @@ def initialize_rbe_configs():
name = "ubuntu22.04-gcc9_manylinux2014-cuda12.3-cudnn8.9", name = "ubuntu22.04-gcc9_manylinux2014-cuda12.3-cudnn8.9",
compiler = "/dt9/usr/bin/gcc", compiler = "/dt9/usr/bin/gcc",
compiler_prefix = "/usr/bin", compiler_prefix = "/usr/bin",
cuda_version = "12.3", cuda_version = "12.3.2",
cudnn_version = "8.9", cudnn_version = "8.9.7.29",
os = "ubuntu22.04-manylinux2014-multipython", os = "ubuntu22.04-manylinux2014-multipython",
python_versions = ["3.9", "3.10", "3.11", "3.12"], python_versions = ["3.9", "3.10", "3.11", "3.12"],
python_install_path = "/usr/local", python_install_path = "/usr/local",
@ -479,7 +479,7 @@ def initialize_rbe_configs():
"TF_CUDNN_VERSION": "8.6", "TF_CUDNN_VERSION": "8.6",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_NEED_TENSORRT": "1", "TF_NEED_TENSORRT": "0",
"TF_SYSROOT": "/dt9", "TF_SYSROOT": "/dt9",
"TF_TENSORRT_VERSION": "8.4", "TF_TENSORRT_VERSION": "8.4",
}, },
@ -558,7 +558,7 @@ def initialize_rbe_configs():
"TF_CUDNN_VERSION": "8.6", "TF_CUDNN_VERSION": "8.6",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_NEED_TENSORRT": "1", "TF_NEED_TENSORRT": "0",
"TF_SYSROOT": "/dt9", "TF_SYSROOT": "/dt9",
"TF_TENSORRT_VERSION": "8.4", "TF_TENSORRT_VERSION": "8.4",
}, },
@ -710,8 +710,8 @@ def initialize_rbe_configs():
"TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
"TF_CUDA_CLANG": "0", "TF_CUDA_CLANG": "0",
"TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0",
"TF_CUDA_VERSION": "12.3", "TF_CUDA_VERSION": "12.3.2",
"TF_CUDNN_VERSION": "8.9", "TF_CUDNN_VERSION": "8.9.7.29",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_NEED_TENSORRT": "0", "TF_NEED_TENSORRT": "0",
@ -749,8 +749,8 @@ def initialize_rbe_configs():
"TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu", "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
"TF_CUDA_CLANG": "1", "TF_CUDA_CLANG": "1",
"TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0",
"TF_CUDA_VERSION": "12.3", "TF_CUDA_VERSION": "12.3.2",
"TF_CUDNN_VERSION": "8.9", "TF_CUDNN_VERSION": "8.9.7.29",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_NEED_TENSORRT": "0", "TF_NEED_TENSORRT": "0",
@ -788,8 +788,8 @@ def initialize_rbe_configs():
"PYTHON_BIN_PATH": "/usr/bin/python3", "PYTHON_BIN_PATH": "/usr/bin/python3",
"TF_CUDA_CLANG": "0", "TF_CUDA_CLANG": "0",
"TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0",
"TF_CUDA_VERSION": "12.3", "TF_CUDA_VERSION": "12.3.2",
"TF_CUDNN_VERSION": "8.9", "TF_CUDNN_VERSION": "8.9.7.29",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_SYSROOT": "/dt9", "TF_SYSROOT": "/dt9",
@ -826,8 +826,8 @@ def initialize_rbe_configs():
"PYTHON_BIN_PATH": "/usr/bin/python3", "PYTHON_BIN_PATH": "/usr/bin/python3",
"TF_CUDA_CLANG": "1", "TF_CUDA_CLANG": "1",
"TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0",
"TF_CUDA_VERSION": "12.3", "TF_CUDA_VERSION": "12.3.2",
"TF_CUDNN_VERSION": "8.9", "TF_CUDNN_VERSION": "8.9.7.29",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_SYSROOT": "/dt9", "TF_SYSROOT": "/dt9",
@ -864,8 +864,8 @@ def initialize_rbe_configs():
"PYTHON_BIN_PATH": "/usr/bin/python3", "PYTHON_BIN_PATH": "/usr/bin/python3",
"TF_CUDA_CLANG": "1", "TF_CUDA_CLANG": "1",
"TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0", "TF_CUDA_COMPUTE_CAPABILITIES": "3.5,6.0",
"TF_CUDA_VERSION": "12.3", "TF_CUDA_VERSION": "12.3.2",
"TF_CUDNN_VERSION": "9.1", "TF_CUDNN_VERSION": "9.1.1",
"TF_ENABLE_XLA": "1", "TF_ENABLE_XLA": "1",
"TF_NEED_CUDA": "1", "TF_NEED_CUDA": "1",
"TF_SYSROOT": "/dt9", "TF_SYSROOT": "/dt9",

View File

@ -1,9 +1,9 @@
"""Macro that creates external repositories for remote config.""" """Macro that creates external repositories for remote config."""
load("//tensorflow/tools/toolchains/remote_config:containers.bzl", "containers") load("//tensorflow/tools/toolchains/remote_config:containers.bzl", "containers")
load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure")
load("//third_party/gpus:rocm_configure.bzl", "remote_rocm_configure") load("//third_party/gpus:rocm_configure.bzl", "remote_rocm_configure")
load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") load("//third_party/gpus/cuda/hermetic:cuda_configure.bzl", "cuda_configure")
load("//third_party/nccl/hermetic:nccl_configure.bzl", "nccl_configure")
load("//third_party/py:python_configure.bzl", "local_python_configure", "remote_python_configure") load("//third_party/py:python_configure.bzl", "local_python_configure", "remote_python_configure")
load("//third_party/remote_config:remote_platform_configure.bzl", "remote_platform_configure") load("//third_party/remote_config:remote_platform_configure.bzl", "remote_platform_configure")
load("//third_party/tensorrt:tensorrt_configure.bzl", "remote_tensorrt_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "remote_tensorrt_configure")
@ -51,20 +51,26 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N
"TF_SYSROOT": sysroot if sysroot else "", "TF_SYSROOT": sysroot if sysroot else "",
}) })
container_name = "cuda%s-cudnn%s-%s" % (cuda_version, cudnn_version, os) cuda_version_in_container = ".".join(cuda_version.split(".")[:2])
cudnn_version_in_container = ".".join(cudnn_version.split(".")[:2])
container_name = "cuda%s-cudnn%s-%s" % (
cuda_version_in_container,
cudnn_version_in_container,
os,
)
container_image = _container_image_uri(container_name) container_image = _container_image_uri(container_name)
exec_properties = { exec_properties = {
"container-image": container_image, "container-image": container_image,
"Pool": "default", "Pool": "default",
} }
remote_cuda_configure( cuda_configure(
name = "%s_config_cuda" % name, name = "%s_config_cuda" % name,
environ = env, environ = env,
exec_properties = exec_properties, exec_properties = exec_properties,
) )
remote_nccl_configure( nccl_configure(
name = "%s_config_nccl" % name, name = "%s_config_nccl" % name,
environ = env, environ = env,
exec_properties = exec_properties, exec_properties = exec_properties,
@ -175,13 +181,13 @@ def sigbuild_tf_configs(name_container_map, env):
"Pool": "default", "Pool": "default",
} }
remote_cuda_configure( cuda_configure(
name = "%s_config_cuda" % name, name = "%s_config_cuda" % name,
environ = env, environ = env,
exec_properties = exec_properties, exec_properties = exec_properties,
) )
remote_nccl_configure( nccl_configure(
name = "%s_config_nccl" % name, name = "%s_config_nccl" % name,
environ = env, environ = env,
exec_properties = exec_properties, exec_properties = exec_properties,

View File

@ -29,7 +29,6 @@ load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
load("//third_party/FP16:workspace.bzl", FP16 = "repo") load("//third_party/FP16:workspace.bzl", FP16 = "repo")
load("//third_party/gemmlowp:workspace.bzl", gemmlowp = "repo") load("//third_party/gemmlowp:workspace.bzl", gemmlowp = "repo")
load("//third_party/git:git_configure.bzl", "git_configure") load("//third_party/git:git_configure.bzl", "git_configure")
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
load("//third_party/gpus:sycl_configure.bzl", "sycl_configure") load("//third_party/gpus:sycl_configure.bzl", "sycl_configure")
load("//third_party/hexagon:workspace.bzl", hexagon_nn = "repo") load("//third_party/hexagon:workspace.bzl", hexagon_nn = "repo")
@ -42,7 +41,6 @@ load("//third_party/kissfft:workspace.bzl", kissfft = "repo")
load("//third_party/libprotobuf_mutator:workspace.bzl", libprotobuf_mutator = "repo") load("//third_party/libprotobuf_mutator:workspace.bzl", libprotobuf_mutator = "repo")
load("//third_party/llvm:setup.bzl", "llvm_setup") load("//third_party/llvm:setup.bzl", "llvm_setup")
load("//third_party/nasm:workspace.bzl", nasm = "repo") load("//third_party/nasm:workspace.bzl", nasm = "repo")
load("//third_party/nccl:nccl_configure.bzl", "nccl_configure")
load("//third_party/opencl_headers:workspace.bzl", opencl_headers = "repo") load("//third_party/opencl_headers:workspace.bzl", opencl_headers = "repo")
load("//third_party/pasta:workspace.bzl", pasta = "repo") load("//third_party/pasta:workspace.bzl", pasta = "repo")
load("//third_party/py:python_configure.bzl", "python_configure") load("//third_party/py:python_configure.bzl", "python_configure")
@ -106,9 +104,7 @@ def _tf_toolchains():
# Note that we check the minimum bazel version in WORKSPACE. # Note that we check the minimum bazel version in WORKSPACE.
clang6_configure(name = "local_config_clang6") clang6_configure(name = "local_config_clang6")
cc_download_clang_toolchain(name = "local_config_download_clang") cc_download_clang_toolchain(name = "local_config_download_clang")
cuda_configure(name = "local_config_cuda")
tensorrt_configure(name = "local_config_tensorrt") tensorrt_configure(name = "local_config_tensorrt")
nccl_configure(name = "local_config_nccl")
git_configure(name = "local_config_git") git_configure(name = "local_config_git")
syslibs_configure(name = "local_config_syslibs") syslibs_configure(name = "local_config_syslibs")
python_configure(name = "local_config_python") python_configure(name = "local_config_python")

View File

@ -14,6 +14,9 @@
# ============================================================================== # ==============================================================================
"""Verifies that a list of libraries is installed on the system. """Verifies that a list of libraries is installed on the system.
NB: DEPRECATED! This script is a part of the deprecated `cuda_configure` rule.
Please use `hermetic/cuda_configure` instead.
Takes a list of arguments with every two subsequent arguments being a logical Takes a list of arguments with every two subsequent arguments being a logical
tuple of (path, check_soname). The path to the library and either True or False tuple of (path, check_soname). The path to the library and either True or False
to indicate whether to check the soname field on the shared library. to indicate whether to check the soname field on the shared library.

View File

@ -0,0 +1,174 @@
"""Common compiler functions. """
load(
"//third_party/remote_config:common.bzl",
"err_out",
"raw_exec",
"realpath",
)
def to_list_of_strings(elements):
"""Convert the list of ["a", "b", "c"] into '"a", "b", "c"'.
This is to be used to put a list of strings into the bzl file templates
so it gets interpreted as list of strings in Starlark.
Args:
elements: list of string elements
Returns:
single string of elements wrapped in quotes separated by a comma."""
quoted_strings = ["\"" + element + "\"" for element in elements]
return ", ".join(quoted_strings)
_INC_DIR_MARKER_BEGIN = "#include <...>"
# OSX add " (framework directory)" at the end of line, strip it.
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
# TODO(dzc): Once these functions have been factored out of Bazel's
# cc_configure.bzl, load them from @bazel_tools instead.
def _cxx_inc_convert(path):
"""Convert path returned by cc -E xc++ in a complete path."""
path = path.strip()
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
return path
def _normalize_include_path(repository_ctx, path):
"""Normalizes include paths before writing them to the crosstool.
If path points inside the 'crosstool' folder of the repository, a relative
path is returned.
If path points outside the 'crosstool' folder, an absolute path is returned.
"""
path = str(repository_ctx.path(path))
crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
if path.startswith(crosstool_folder):
# We drop the path to "$REPO/crosstool" and a trailing path separator.
return path[len(crosstool_folder) + 1:]
return path
def _is_compiler_option_supported(repository_ctx, cc, option):
"""Checks that `option` is supported by the C compiler. Doesn't %-escape the option."""
result = repository_ctx.execute([
cc,
option,
"-o",
"/dev/null",
"-c",
str(repository_ctx.path("tools/cpp/empty.cc")),
])
return result.stderr.find(option) == -1
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sys_root):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"
sysroot = []
if tf_sys_root:
sysroot += ["--sysroot", tf_sys_root]
result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] +
sysroot)
stderr = err_out(result)
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
return []
index1 = stderr.find("\n", index1)
if index1 == -1:
return []
index2 = stderr.rfind("\n ")
if index2 == -1 or index2 < index1:
return []
index2 = stderr.find("\n", index2 + 1)
if index2 == -1:
inc_dirs = stderr[index1 + 1:]
else:
inc_dirs = stderr[index1 + 1:index2].strip()
print_resource_dir_supported = _is_compiler_option_supported(
repository_ctx,
cc,
"-print-resource-dir",
)
if print_resource_dir_supported:
resource_dir = repository_ctx.execute(
[cc, "-print-resource-dir"],
).stdout.strip() + "/share"
inc_dirs += "\n" + resource_dir
compiler_includes = [
_normalize_include_path(repository_ctx, _cxx_inc_convert(p))
for p in inc_dirs.split("\n")
]
# The compiler might be on a symlink, e.g. /symlink -> /opt/gcc
# The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11)
# but Bazel might encounter either (usually reported by the compiler)
# especially when a compiler wrapper (e.g. ccache) is used.
# So we need to also include paths where symlinks are not resolved.
# Try to find real path to CC installation to "see through" compiler wrappers
# GCC has the path to g++
index1 = result.stderr.find("COLLECT_GCC=")
if index1 != -1:
index1 = result.stderr.find("=", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname
else:
# Clang has the directory
index1 = result.stderr.find("InstalledDir: ")
if index1 != -1:
index1 = result.stderr.find(" ", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname
else:
# Fallback to the CC path
cc_topdir = repository_ctx.path(cc).dirname.dirname
# We now have the compiler installation prefix, e.g. /symlink/gcc
# And the resolved installation prefix, e.g. /opt/gcc
cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip()
cc_topdir = str(cc_topdir).strip()
# If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept.
# e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path]
# adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include]
if cc_topdir_resolved != cc_topdir:
unresolved_compiler_includes = [
cc_topdir + inc[len(cc_topdir_resolved):]
for inc in compiler_includes
if inc.startswith(cc_topdir_resolved)
]
compiler_includes = compiler_includes + unresolved_compiler_includes
return compiler_includes
def get_cxx_inc_directories(repository_ctx, cc, tf_sys_root):
"""Compute the list of default C and C++ include directories."""
# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
True,
tf_sys_root,
)
includes_c = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
False,
tf_sys_root,
)
return includes_cpp + [
inc
for inc in includes_c
if inc not in includes_cpp
]

View File

@ -2,6 +2,7 @@
# Update cuda_configure.bzl#verify_build_defines when adding new variables. # Update cuda_configure.bzl#verify_build_defines when adding new variables.
load(":cc_toolchain_config.bzl", "cc_toolchain_config") load(":cc_toolchain_config.bzl", "cc_toolchain_config")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
licenses(["restricted"]) licenses(["restricted"])
@ -133,9 +134,17 @@ filegroup(
srcs = [], srcs = [],
) )
filegroup(
name = "cuda_nvcc_files",
srcs = %{cuda_nvcc_files},
)
filegroup( filegroup(
name = "crosstool_wrapper_driver_is_not_gcc", name = "crosstool_wrapper_driver_is_not_gcc",
srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], srcs = [
":cuda_nvcc_files",
":clang/bin/crosstool_wrapper_driver_is_not_gcc"
],
) )
filegroup( filegroup(

View File

@ -1,6 +1,10 @@
# NB: DEPRECATED! This file is a part of the deprecated `cuda_configure` rule.
# Please use `hermetic/cuda_configure` instead.
load(":build_defs.bzl", "cuda_header_library") load(":build_defs.bzl", "cuda_header_library")
load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//lib:selects.bzl", "selects")
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "bool_setting")
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
@ -144,7 +148,6 @@ cc_library(
name = "cusolver", name = "cusolver",
srcs = ["cuda/lib/%{cusolver_lib}"], srcs = ["cuda/lib/%{cusolver_lib}"],
data = ["cuda/lib/%{cusolver_lib}"], data = ["cuda/lib/%{cusolver_lib}"],
linkopts = ["-lgomp"],
linkstatic = 1, linkstatic = 1,
) )
@ -220,7 +223,6 @@ cc_library(
name = "cusparse", name = "cusparse",
srcs = ["cuda/lib/%{cusparse_lib}"], srcs = ["cuda/lib/%{cusparse_lib}"],
data = ["cuda/lib/%{cusparse_lib}"], data = ["cuda/lib/%{cusparse_lib}"],
linkopts = ["-lgomp"],
linkstatic = 1, linkstatic = 1,
) )
@ -242,6 +244,41 @@ py_library(
srcs = ["cuda/cuda_config.py"], srcs = ["cuda/cuda_config.py"],
) )
# Build setting that is always true (i.e. it can not be changed on the
# command line). It is used to create the config settings below that are
# always or never satisfied.
bool_setting(
name = "true_setting",
visibility = ["//visibility:private"],
build_setting_default = True,
)
# Config settings whether TensorFlow is built with hermetic CUDA.
# These configs are never satisfied.
config_setting(
name = "hermetic_cuda_tools",
flag_values = {":true_setting": "False"},
)
# Flag indicating if we should include hermetic CUDA libs.
bool_flag(
name = "include_hermetic_cuda_libs",
build_setting_default = False,
)
config_setting(
name = "hermetic_cuda_libs",
flag_values = {":true_setting": "False"},
)
selects.config_setting_group(
name = "hermetic_cuda_tools_and_libs",
match_all = [
":hermetic_cuda_libs",
":hermetic_cuda_tools"
],
)
%{copy_rules} %{copy_rules}
cc_library( cc_library(

View File

@ -1,3 +1,7 @@
# NB: DEPRECATED! This file is a part of the deprecated `cuda_configure` rule.
# Hermetic CUDA repository rule doesn't support Windows.
# Please use `hermetic/cuda_configure`.
load(":build_defs.bzl", "cuda_header_library") load(":build_defs.bzl", "cuda_header_library")
load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//lib:selects.bzl", "selects")

0
third_party/gpus/cuda/hermetic/BUILD vendored Normal file
View File

261
third_party/gpus/cuda/hermetic/BUILD.tpl vendored Normal file
View File

@ -0,0 +1,261 @@
load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects")
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
package(default_visibility = ["//visibility:public"])
# Config setting whether TensorFlow is built with CUDA support using clang.
#
# TODO(b/174244321), DEPRECATED: this target will be removed when all users
# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_clang.
selects.config_setting_group(
name = "using_clang",
match_all = [
"@local_config_cuda//:is_cuda_enabled",
"@local_config_cuda//:is_cuda_compiler_clang",
],
)
# Config setting whether TensorFlow is built with CUDA support using nvcc.
#
# TODO(b/174244321), DEPRECATED: this target will be removed when all users
# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_nvcc.
selects.config_setting_group(
name = "using_nvcc",
match_all = [
"@local_config_cuda//:is_cuda_enabled",
"@local_config_cuda//:is_cuda_compiler_nvcc",
],
)
# Equivalent to using_clang && -c opt.
selects.config_setting_group(
name = "using_clang_opt",
match_all = [
":using_clang",
":_opt",
],
)
config_setting(
name = "_opt",
values = {"compilation_mode": "opt"},
)
# Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"'
# All clients including TensorFlow should use these directives.
cc_library(
name = "cuda_headers",
hdrs = [
"cuda/cuda_config.h",
],
include_prefix = "third_party/gpus",
includes = [
".", # required to include cuda/cuda/cuda_config.h as cuda/config.h
],
deps = [":cudart_headers",
":cublas_headers",
":cccl_headers",
":nvtx_headers",
":nvcc_headers",
":cusolver_headers",
":cufft_headers",
":cusparse_headers",
":curand_headers",
":cupti_headers",
":nvml_headers"],
)
cc_library(
name = "cudart_static",
srcs = ["@cuda_cudart//:static"],
linkopts = [
"-ldl",
"-lpthread",
%{cudart_static_linkopt}
],
)
alias(
name = "cuda_driver",
actual = "@cuda_cudart//:cuda_driver",
)
alias(
name = "cudart_headers",
actual = "@cuda_cudart//:headers",
)
alias(
name = "cudart",
actual = "@cuda_cudart//:cudart",
)
alias(
name = "nvtx_headers",
actual = "@cuda_nvtx//:headers",
)
alias(
name = "nvml_headers",
actual = "@cuda_nvml//:headers",
)
alias(
name = "nvcc_headers",
actual = "@cuda_nvcc//:headers",
)
alias(
name = "cccl_headers",
actual = "@cuda_cccl//:headers",
)
alias(
name = "cublas_headers",
actual = "@cuda_cublas//:headers",
)
alias(
name = "cusolver_headers",
actual = "@cuda_cusolver//:headers",
)
alias(
name = "cufft_headers",
actual = "@cuda_cufft//:headers",
)
alias(
name = "cusparse_headers",
actual = "@cuda_cusparse//:headers",
)
alias(
name = "curand_headers",
actual = "@cuda_curand//:headers",
)
alias(
name = "cublas",
actual = "@cuda_cublas//:cublas",
)
alias(
name = "cublasLt",
actual = "@cuda_cublas//:cublasLt",
)
alias(
name = "cusolver",
actual = "@cuda_cusolver//:cusolver",
)
alias(
name = "cudnn",
actual = "@cuda_cudnn//:cudnn",
)
alias(
name = "cudnn_header",
actual = "@cuda_cudnn//:headers",
)
alias(
name = "cufft",
actual = "@cuda_cufft//:cufft",
)
alias(
name = "curand",
actual = "@cuda_curand//:curand",
)
cc_library(
name = "cuda",
deps = [
":cublas",
":cublasLt",
":cuda_headers",
":cudart",
":cudnn",
":cufft",
":curand",
],
)
alias(
name = "cub_headers",
actual = ":cuda_headers",
)
alias(
name = "cupti_headers",
actual = "@cuda_cupti//:headers",
)
alias(
name = "cupti_dsos",
actual = "@cuda_cupti//:cupti",
)
alias(
name = "cusparse",
actual = "@cuda_cusparse//:cusparse",
)
alias(
name = "cuda-nvvm",
actual = "@cuda_nvcc//:nvvm",
)
cc_library(
name = "libdevice_root",
data = [":cuda-nvvm"],
)
bzl_library(
name = "build_defs_bzl",
srcs = ["build_defs.bzl"],
deps = [
"@bazel_skylib//lib:selects",
],
)
py_library(
name = "cuda_config_py",
srcs = ["cuda/cuda_config.py"],
)
# Config setting whether TensorFlow is built with hermetic CUDA.
alias(
name = "hermetic_cuda_tools",
actual = "@local_config_cuda//:is_cuda_enabled",
)
# Flag indicating if we should include hermetic CUDA libs.
bool_flag(
name = "include_hermetic_cuda_libs",
build_setting_default = False,
)
config_setting(
name = "hermetic_cuda_libs",
flag_values = {":include_hermetic_cuda_libs": "True"},
)
selects.config_setting_group(
name = "hermetic_cuda_tools_and_libs",
match_all = [
":hermetic_cuda_libs",
":hermetic_cuda_tools"
],
)
cc_library(
# This is not yet fully supported, but we need the rule
# to make bazel query happy.
name = "nvptxcompiler",
)

View File

@ -0,0 +1,15 @@
licenses(["restricted"]) # NVIDIA proprietary license
cc_library(
name = "headers",
hdrs = glob([
%{comment}"include/cub/**",
%{comment}"include/cuda/**",
%{comment}"include/nv/**",
%{comment}"include/thrust/**",
]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,521 @@
"""Repository rule for hermetic CUDA autoconfiguration.
`cuda_configure` depends on the following environment variables:
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
* `TF_NVCC_CLANG`: Whether to use clang for C++ and NVCC for Cuda compilation.
* `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
both host and device code compilation.
* `TF_SYSROOT`: The sysroot to use when compiling.
* `HERMETIC_CUDA_VERSION`: The version of the CUDA toolkit. If not specified,
the version will be determined by the `TF_CUDA_VERSION`.
* `HERMETIC_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default
is `3.5,5.2`. If not specified, the value will be determined by the
`TF_CUDA_COMPUTE_CAPABILITIES`.
* `PYTHON_BIN_PATH`: The python binary path
"""
load(
"//third_party/gpus:compiler_common_tools.bzl",
"get_cxx_inc_directories",
"to_list_of_strings",
)
load(
"//third_party/remote_config:common.bzl",
"get_cpu_value",
"get_host_environ",
"which",
)
def _find_cc(repository_ctx):
"""Find the C++ compiler."""
cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
cc_name = "clang"
cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar)
if cc_name_from_env:
cc_name = cc_name_from_env
if cc_name.startswith("/"):
# Return the absolute path.
return cc_name
cc = which(repository_ctx, cc_name)
if cc == None:
fail(("Cannot find {}, either correct your path or set the {}" +
" environment variable").format(cc_name, cc_path_envvar))
return cc
def _auto_configure_fail(msg):
"""Output failure message when cuda configuration fails."""
red = "\033[0;31m"
no_color = "\033[0m"
fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
def _verify_build_defines(params):
"""Verify all variables that crosstool/BUILD.tpl expects are substituted.
Args:
params: dict of variables that will be passed to the BUILD.tpl template.
"""
missing = []
for param in [
"cxx_builtin_include_directories",
"extra_no_canonical_prefixes_flags",
"host_compiler_path",
"host_compiler_prefix",
"host_compiler_warnings",
"linker_bin_path",
"compiler_deps",
"msvc_cl_path",
"msvc_env_include",
"msvc_env_lib",
"msvc_env_path",
"msvc_env_tmp",
"msvc_lib_path",
"msvc_link_path",
"msvc_ml_path",
"unfiltered_compile_flags",
"win_compiler_deps",
]:
if ("%{" + param + "}") not in params:
missing.append(param)
if missing:
_auto_configure_fail(
"BUILD.tpl template is missing these variables: " +
str(missing) +
".\nWe only got: " +
str(params) +
".",
)
def get_cuda_version(repository_ctx):
return (get_host_environ(repository_ctx, HERMETIC_CUDA_VERSION) or
get_host_environ(repository_ctx, TF_CUDA_VERSION))
def enable_cuda(repository_ctx):
"""Returns whether to build with CUDA support."""
return int(get_host_environ(repository_ctx, TF_NEED_CUDA, False))
def _flag_enabled(repository_ctx, flag_name):
return get_host_environ(repository_ctx, flag_name) == "1"
def _use_nvcc_and_clang(repository_ctx):
# Returns the flag if we need to use clang for C++ and NVCC for Cuda.
return _flag_enabled(repository_ctx, _TF_NVCC_CLANG)
def _tf_sysroot(repository_ctx):
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
def _py_tmpl_dict(d):
return {"%{cuda_config}": str(d)}
def _cudart_static_linkopt(cpu_value):
"""Returns additional platform-specific linkopts for cudart."""
return "\"\"," if cpu_value == "Darwin" else "\"-lrt\","
def _compute_capabilities(repository_ctx):
"""Returns a list of strings representing cuda compute capabilities.
Args:
repository_ctx: the repo rule's context.
Returns:
list of cuda architectures to compile for. 'compute_xy' refers to
both PTX and SASS, 'sm_xy' refers to SASS only.
"""
capabilities = (get_host_environ(
repository_ctx,
_HERMETIC_CUDA_COMPUTE_CAPABILITIES,
) or
get_host_environ(
repository_ctx,
_TF_CUDA_COMPUTE_CAPABILITIES,
))
capabilities = (capabilities or "compute_35,compute_52").split(",")
# Map old 'x.y' capabilities to 'compute_xy'.
if len(capabilities) > 0 and all([len(x.split(".")) == 2 for x in capabilities]):
# If all capabilities are in 'x.y' format, only include PTX for the
# highest capability.
cc_list = sorted([x.replace(".", "") for x in capabilities])
capabilities = [
"sm_%s" % x
for x in cc_list[:-1]
] + ["compute_%s" % cc_list[-1]]
for i, capability in enumerate(capabilities):
parts = capability.split(".")
if len(parts) != 2:
continue
capabilities[i] = "compute_%s%s" % (parts[0], parts[1])
# Make list unique
capabilities = dict(zip(capabilities, capabilities)).keys()
# Validate capabilities.
for capability in capabilities:
if not capability.startswith(("compute_", "sm_")):
_auto_configure_fail("Invalid compute capability: %s" % capability)
for prefix in ["compute_", "sm_"]:
if not capability.startswith(prefix):
continue
if len(capability) == len(prefix) + 2 and capability[-2:].isdigit():
continue
if len(capability) == len(prefix) + 3 and capability.endswith("90a"):
continue
_auto_configure_fail("Invalid compute capability: %s" % capability)
return capabilities
def _compute_cuda_extra_copts(compute_capabilities):
copts = ["--no-cuda-include-ptx=all"]
for capability in compute_capabilities:
if capability.startswith("compute_"):
capability = capability.replace("compute_", "sm_")
copts.append("--cuda-include-ptx=%s" % capability)
copts.append("--cuda-gpu-arch=%s" % capability)
return str(copts)
def _get_cuda_config(repository_ctx):
"""Detects and returns information about the CUDA installation on the system.
Args:
repository_ctx: The repository context.
Returns:
A struct containing the following fields:
cuda_version: The version of CUDA on the system.
cudart_version: The CUDA runtime version on the system.
cudnn_version: The version of cuDNN on the system.
compute_capabilities: A list of the system's CUDA compute capabilities.
cpu_value: The name of the host operating system.
"""
return struct(
cuda_version = get_cuda_version(repository_ctx),
cupti_version = repository_ctx.read(repository_ctx.attr.cupti_version),
cudart_version = repository_ctx.read(repository_ctx.attr.cudart_version),
cublas_version = repository_ctx.read(repository_ctx.attr.cublas_version),
cusolver_version = repository_ctx.read(repository_ctx.attr.cusolver_version),
curand_version = repository_ctx.read(repository_ctx.attr.curand_version),
cufft_version = repository_ctx.read(repository_ctx.attr.cufft_version),
cusparse_version = repository_ctx.read(repository_ctx.attr.cusparse_version),
cudnn_version = repository_ctx.read(repository_ctx.attr.cudnn_version),
compute_capabilities = _compute_capabilities(repository_ctx),
cpu_value = get_cpu_value(repository_ctx),
)
_DUMMY_CROSSTOOL_BZL_FILE = """
def error_gpu_disabled():
fail("ERROR: Building with --config=cuda but TensorFlow is not configured " +
"to build with GPU support. Please re-run ./configure and enter 'Y' " +
"at the prompt to build with GPU support.")
native.genrule(
name = "error_gen_crosstool",
outs = ["CROSSTOOL"],
cmd = "echo 'Should not be run.' && exit 1",
)
native.filegroup(
name = "crosstool",
srcs = [":CROSSTOOL"],
output_licenses = ["unencumbered"],
)
"""
_DUMMY_CROSSTOOL_BUILD_FILE = """
load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
error_gpu_disabled()
"""
def _create_dummy_repository(repository_ctx):
cpu_value = get_cpu_value(repository_ctx)
# Set up BUILD file for cuda/.
repository_ctx.template(
"cuda/build_defs.bzl",
repository_ctx.attr.build_defs_tpl,
{
"%{cuda_is_configured}": "False",
"%{cuda_extra_copts}": "[]",
"%{cuda_gpu_architectures}": "[]",
"%{cuda_version}": "0.0",
},
)
repository_ctx.template(
"cuda/BUILD",
repository_ctx.attr.cuda_build_tpl,
{
"%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
},
)
# Set up cuda_config.h, which is used by
# tensorflow/compiler/xla/stream_executor/dso_loader.cc.
repository_ctx.template(
"cuda/cuda/cuda_config.h",
repository_ctx.attr.cuda_config_tpl,
{
"%{cuda_version}": "",
"%{cudart_version}": "",
"%{cupti_version}": "",
"%{cublas_version}": "",
"%{cusolver_version}": "",
"%{curand_version}": "",
"%{cufft_version}": "",
"%{cusparse_version}": "",
"%{cudnn_version}": "",
"%{cuda_toolkit_path}": "",
"%{cuda_compute_capabilities}": "",
},
)
# Set up cuda_config.py, which is used by gen_build_info to provide
# static build environment info to the API
repository_ctx.template(
"cuda/cuda/cuda_config.py",
repository_ctx.attr.cuda_config_py_tpl,
_py_tmpl_dict({}),
)
# If cuda_configure is not configured to build with GPU support, and the user
# attempts to build with --config=cuda, add a dummy build rule to intercept
# this and fail with an actionable error message.
repository_ctx.file(
"crosstool/error_gpu_disabled.bzl",
_DUMMY_CROSSTOOL_BZL_FILE,
)
repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
def _create_local_cuda_repository(repository_ctx):
"""Creates the repository containing files set up to build with CUDA."""
cuda_config = _get_cuda_config(repository_ctx)
# Set up BUILD file for cuda/
repository_ctx.template(
"cuda/build_defs.bzl",
repository_ctx.attr.build_defs_tpl,
{
"%{cuda_is_configured}": "True",
"%{cuda_extra_copts}": _compute_cuda_extra_copts(
cuda_config.compute_capabilities,
),
"%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities),
"%{cuda_version}": cuda_config.cuda_version,
},
)
repository_ctx.template(
"cuda/BUILD",
repository_ctx.attr.cuda_build_tpl,
{
"%{cudart_static_linkopt}": _cudart_static_linkopt(
cuda_config.cpu_value,
),
},
)
is_nvcc_and_clang = _use_nvcc_and_clang(repository_ctx)
tf_sysroot = _tf_sysroot(repository_ctx)
# Set up crosstool/
cc = _find_cc(repository_ctx)
host_compiler_includes = get_cxx_inc_directories(
repository_ctx,
cc,
tf_sysroot,
)
cuda_defines = {}
# We do not support hermetic CUDA on Windows.
# This ensures the CROSSTOOL file parser is happy.
cuda_defines.update({
"%{msvc_env_tmp}": "msvc_not_used",
"%{msvc_env_path}": "msvc_not_used",
"%{msvc_env_include}": "msvc_not_used",
"%{msvc_env_lib}": "msvc_not_used",
"%{msvc_cl_path}": "msvc_not_used",
"%{msvc_ml_path}": "msvc_not_used",
"%{msvc_link_path}": "msvc_not_used",
"%{msvc_lib_path}": "msvc_not_used",
"%{win_compiler_deps}": ":empty",
})
cuda_defines["%{builtin_sysroot}"] = tf_sysroot
cuda_defines["%{cuda_toolkit_path}"] = repository_ctx.attr.nvcc_binary.workspace_root
cuda_defines["%{compiler}"] = "clang"
cuda_defines["%{host_compiler_prefix}"] = "/usr/bin"
cuda_defines["%{linker_bin_path}"] = ""
cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
cuda_defines["%{unfiltered_compile_flags}"] = ""
cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(
host_compiler_includes,
)
cuda_defines["%{cuda_nvcc_files}"] = "if_cuda([\"@{nvcc_archive}//:bin\", \"@{nvcc_archive}//:nvvm\"])".format(
nvcc_archive = repository_ctx.attr.nvcc_binary.repo_name,
)
if not is_nvcc_and_clang:
cuda_defines["%{host_compiler_path}"] = str(cc)
cuda_defines["%{host_compiler_warnings}"] = """
# Some parts of the codebase set -Werror and hit this warning, so
# switch it off for now.
"-Wno-invalid-partial-specialization"
"""
cuda_defines["%{compiler_deps}"] = ":cuda_nvcc_files"
repository_ctx.file(
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
"",
)
else:
cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
cuda_defines["%{host_compiler_warnings}"] = ""
nvcc_relative_path = "%s/%s" % (
repository_ctx.attr.nvcc_binary.workspace_root,
repository_ctx.attr.nvcc_binary.name,
)
cuda_defines["%{compiler_deps}"] = ":crosstool_wrapper_driver_is_not_gcc"
wrapper_defines = {
"%{cpu_compiler}": str(cc),
"%{cuda_version}": cuda_config.cuda_version,
"%{nvcc_path}": nvcc_relative_path,
"%{host_compiler_path}": str(cc),
"%{use_clang_compiler}": "True",
}
repository_ctx.template(
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
repository_ctx.attr.crosstool_wrapper_driver_is_not_gcc_tpl,
wrapper_defines,
)
_verify_build_defines(cuda_defines)
# Only expand template variables in the BUILD file
repository_ctx.template(
"crosstool/BUILD",
repository_ctx.attr.crosstool_build_tpl,
cuda_defines,
)
# No templating of cc_toolchain_config - use attributes and templatize the
# BUILD file.
repository_ctx.template(
"crosstool/cc_toolchain_config.bzl",
repository_ctx.attr.cc_toolchain_config_tpl,
{},
)
# Set up cuda_config.h, which is used by
# tensorflow/compiler/xla/stream_executor/dso_loader.cc.
repository_ctx.template(
"cuda/cuda/cuda_config.h",
repository_ctx.attr.cuda_config_tpl,
{
"%{cuda_version}": cuda_config.cuda_version,
"%{cudart_version}": cuda_config.cudart_version,
"%{cupti_version}": cuda_config.cupti_version,
"%{cublas_version}": cuda_config.cublas_version,
"%{cusolver_version}": cuda_config.cusolver_version,
"%{curand_version}": cuda_config.curand_version,
"%{cufft_version}": cuda_config.cufft_version,
"%{cusparse_version}": cuda_config.cusparse_version,
"%{cudnn_version}": cuda_config.cudnn_version,
"%{cuda_toolkit_path}": "",
"%{cuda_compute_capabilities}": ", ".join([
cc.split("_")[1]
for cc in cuda_config.compute_capabilities
]),
},
)
# Set up cuda_config.py, which is used by gen_build_info to provide
# static build environment info to the API
repository_ctx.template(
"cuda/cuda/cuda_config.py",
repository_ctx.attr.cuda_config_py_tpl,
_py_tmpl_dict({
"cuda_version": cuda_config.cuda_version,
"cudnn_version": cuda_config.cudnn_version,
"cuda_compute_capabilities": cuda_config.compute_capabilities,
"cpu_compiler": str(cc),
}),
)
def _cuda_autoconf_impl(repository_ctx):
"""Implementation of the cuda_autoconf repository rule."""
build_file = repository_ctx.attr.local_config_cuda_build_file
if not enable_cuda(repository_ctx):
_create_dummy_repository(repository_ctx)
else:
_create_local_cuda_repository(repository_ctx)
repository_ctx.symlink(build_file, "BUILD")
_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
_PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
_HERMETIC_CUDA_COMPUTE_CAPABILITIES = "HERMETIC_CUDA_COMPUTE_CAPABILITIES"
_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
HERMETIC_CUDA_VERSION = "HERMETIC_CUDA_VERSION"
TF_CUDA_VERSION = "TF_CUDA_VERSION"
TF_NEED_CUDA = "TF_NEED_CUDA"
_TF_NVCC_CLANG = "TF_NVCC_CLANG"
_TF_SYSROOT = "TF_SYSROOT"
_ENVIRONS = [
_CLANG_CUDA_COMPILER_PATH,
TF_NEED_CUDA,
_TF_NVCC_CLANG,
TF_CUDA_VERSION,
HERMETIC_CUDA_VERSION,
_TF_CUDA_COMPUTE_CAPABILITIES,
_HERMETIC_CUDA_COMPUTE_CAPABILITIES,
_TF_SYSROOT,
_PYTHON_BIN_PATH,
"TMP",
"TMPDIR",
"LOCAL_CUDA_PATH",
"LOCAL_CUDNN_PATH",
]
cuda_configure = repository_rule(
implementation = _cuda_autoconf_impl,
environ = _ENVIRONS,
attrs = {
"environ": attr.string_dict(),
"cublas_version": attr.label(default = Label("@cuda_cublas//:version.txt")),
"cudart_version": attr.label(default = Label("@cuda_cudart//:version.txt")),
"cudnn_version": attr.label(default = Label("@cuda_cudnn//:version.txt")),
"cufft_version": attr.label(default = Label("@cuda_cufft//:version.txt")),
"cupti_version": attr.label(default = Label("@cuda_cupti//:version.txt")),
"curand_version": attr.label(default = Label("@cuda_curand//:version.txt")),
"cusolver_version": attr.label(default = Label("@cuda_cusolver//:version.txt")),
"cusparse_version": attr.label(default = Label("@cuda_cusparse//:version.txt")),
"nvcc_binary": attr.label(default = Label("@cuda_nvcc//:bin/nvcc")),
"local_config_cuda_build_file": attr.label(default = Label("//third_party/gpus:local_config_cuda.BUILD")),
"build_defs_tpl": attr.label(default = Label("//third_party/gpus/cuda:build_defs.bzl.tpl")),
"cuda_build_tpl": attr.label(default = Label("//third_party/gpus/cuda/hermetic:BUILD.tpl")),
"cuda_config_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.h.tpl")),
"cuda_config_py_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.py.tpl")),
"crosstool_wrapper_driver_is_not_gcc_tpl": attr.label(default = Label("//third_party/gpus/crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl")),
"crosstool_build_tpl": attr.label(default = Label("//third_party/gpus/crosstool:BUILD.tpl")),
"cc_toolchain_config_tpl": attr.label(default = Label("//third_party/gpus/crosstool:cc_toolchain_config.bzl.tpl")),
},
)
"""Detects and configures the hermetic CUDA toolchain.
Add the following to your WORKSPACE file:
```python
cuda_configure(name = "local_config_cuda")
```
Args:
name: A unique name for this workspace rule.
""" # buildifier: disable=no-effect

View File

@ -0,0 +1,44 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cublas_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcublas.so.%{libcublas_version}",
deps = [":cublasLt"],
)
cc_import(
name = "cublasLt_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcublasLt.so.%{libcublaslt_version}",
)
%{multiline_comment}
cc_library(
name = "cublas",
visibility = ["//visibility:public"],
%{comment}deps = [":cublas_shared_library"],
)
cc_library(
name = "cublasLt",
visibility = ["//visibility:public"],
%{comment}deps = [":cublasLt_shared_library"],
)
cc_library(
name = "headers",
%{comment}hdrs = [
%{comment}"include/cublas.h",
%{comment}"include/cublasLt.h",
%{comment}"include/cublas_api.h",
%{comment}"include/cublas_v2.h",
%{comment}],
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,126 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
filegroup(
name = "static",
srcs = ["lib/libcudart_static.a"],
visibility = ["@local_config_cuda//cuda:__pkg__"],
)
%{multiline_comment}
# TODO: Replace system provided library with hermetic NVIDIA driver library.
cc_import(
name = "cuda_driver_shared_library",
interface_library = "lib/stubs/libcuda.so",
system_provided = 1,
)
cc_import(
name = "cudart_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcudart.so.%{libcudart_version}",
)
%{multiline_comment}
cc_library(
name = "cuda_driver",
%{comment}deps = [":cuda_driver_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "cudart",
%{comment}deps = [
%{comment}":cuda_driver",
%{comment}":cudart_shared_library",
%{comment}],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/builtin_types.h",
%{comment}"include/channel_descriptor.h",
%{comment}"include/common_functions.h",
%{comment}"include/cooperative_groups/**",
%{comment}"include/cooperative_groups.h",
%{comment}"include/cuComplex.h",
%{comment}"include/cuda.h",
%{comment}"include/cudaEGL.h",
%{comment}"include/cudaEGLTypedefs.h",
%{comment}"include/cudaGL.h",
%{comment}"include/cudaGLTypedefs.h",
%{comment}"include/cudaProfilerTypedefs.h",
%{comment}"include/cudaTypedefs.h",
%{comment}"include/cudaVDPAU.h",
%{comment}"include/cudaVDPAUTypedefs.h",
%{comment}"include/cuda_awbarrier.h",
%{comment}"include/cuda_awbarrier_helpers.h",
%{comment}"include/cuda_awbarrier_primitives.h",
%{comment}"include/cuda_bf16.h",
%{comment}"include/cuda_bf16.hpp",
%{comment}"include/cuda_device_runtime_api.h",
%{comment}"include/cuda_egl_interop.h",
%{comment}"include/cuda_fp16.h",
%{comment}"include/cuda_fp16.hpp",
%{comment}"include/cuda_fp8.h",
%{comment}"include/cuda_fp8.hpp",
%{comment}"include/cuda_gl_interop.h",
%{comment}"include/cuda_occupancy.h",
%{comment}"include/cuda_pipeline.h",
%{comment}"include/cuda_pipeline_helpers.h",
%{comment}"include/cuda_pipeline_primitives.h",
%{comment}"include/cuda_runtime.h",
%{comment}"include/cuda_runtime_api.h",
%{comment}"include/cuda_surface_types.h",
%{comment}"include/cuda_texture_types.h",
%{comment}"include/cuda_vdpau_interop.h",
%{comment}"include/cudart_platform.h",
%{comment}"include/device_atomic_functions.h",
%{comment}"include/device_atomic_functions.hpp",
%{comment}"include/device_double_functions.h",
%{comment}"include/device_functions.h",
%{comment}"include/device_launch_parameters.h",
%{comment}"include/device_types.h",
%{comment}"include/driver_functions.h",
%{comment}"include/driver_types.h",
%{comment}"include/host_config.h",
%{comment}"include/host_defines.h",
%{comment}"include/library_types.h",
%{comment}"include/math_constants.h",
%{comment}"include/math_functions.h",
%{comment}"include/mma.h",
%{comment}"include/nvfunctional",
%{comment}"include/sm_20_atomic_functions.h",
%{comment}"include/sm_20_atomic_functions.hpp",
%{comment}"include/sm_20_intrinsics.h",
%{comment}"include/sm_20_intrinsics.hpp",
%{comment}"include/sm_30_intrinsics.h",
%{comment}"include/sm_30_intrinsics.hpp",
%{comment}"include/sm_32_atomic_functions.h",
%{comment}"include/sm_32_atomic_functions.hpp",
%{comment}"include/sm_32_intrinsics.h",
%{comment}"include/sm_32_intrinsics.hpp",
%{comment}"include/sm_35_atomic_functions.h",
%{comment}"include/sm_35_intrinsics.h",
%{comment}"include/sm_60_atomic_functions.h",
%{comment}"include/sm_60_atomic_functions.hpp",
%{comment}"include/sm_61_intrinsics.h",
%{comment}"include/sm_61_intrinsics.hpp",
%{comment}"include/surface_functions.h",
%{comment}"include/surface_indirect_functions.h",
%{comment}"include/surface_types.h",
%{comment}"include/texture_fetch_functions.h",
%{comment}"include/texture_indirect_functions.h",
%{comment}"include/texture_types.h",
%{comment}"include/vector_functions.h",
%{comment}"include/vector_functions.hpp",
%{comment}"include/vector_types.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,73 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cudnn_ops_infer",
hdrs = [":headers"],
shared_library = "lib/libcudnn_ops_infer.so.%{libcudnn_ops_infer_version}",
)
cc_import(
name = "cudnn_cnn_infer",
hdrs = [":headers"],
shared_library = "lib/libcudnn_cnn_infer.so.%{libcudnn_cnn_infer_version}",
)
cc_import(
name = "cudnn_ops_train",
hdrs = [":headers"],
shared_library = "lib/libcudnn_ops_train.so.%{libcudnn_ops_train_version}",
)
cc_import(
name = "cudnn_cnn_train",
hdrs = [":headers"],
shared_library = "lib/libcudnn_cnn_train.so.%{libcudnn_cnn_train_version}",
)
cc_import(
name = "cudnn_adv_infer",
hdrs = [":headers"],
shared_library = "lib/libcudnn_adv_infer.so.%{libcudnn_adv_infer_version}",
)
cc_import(
name = "cudnn_adv_train",
hdrs = [":headers"],
shared_library = "lib/libcudnn_adv_train.so.%{libcudnn_adv_train_version}",
)
cc_import(
name = "cudnn_main",
hdrs = [":headers"],
shared_library = "lib/libcudnn.so.%{libcudnn_version}",
)
%{multiline_comment}
cc_library(
name = "cudnn",
%{comment}deps = [
%{comment}":cudnn_ops_infer",
%{comment}":cudnn_ops_train",
%{comment}":cudnn_cnn_infer",
%{comment}":cudnn_cnn_train",
%{comment}":cudnn_adv_infer",
%{comment}":cudnn_adv_train",
%{comment}"@cuda_nvrtc//:nvrtc",
%{comment}":cudnn_main",
%{comment}],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cudnn*.h",
%{comment}]),
include_prefix = "third_party/gpus/cudnn",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,80 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cudnn_ops",
hdrs = [":headers"],
shared_library = "lib/libcudnn_ops.so.%{libcudnn_ops_version}",
)
cc_import(
name = "cudnn_cnn",
hdrs = [":headers"],
shared_library = "lib/libcudnn_cnn.so.%{libcudnn_cnn_version}",
)
cc_import(
name = "cudnn_adv",
hdrs = [":headers"],
shared_library = "lib/libcudnn_adv.so.%{libcudnn_adv_version}",
)
cc_import(
name = "cudnn_graph",
hdrs = [":headers"],
shared_library = "lib/libcudnn_graph.so.%{libcudnn_graph_version}",
)
cc_import(
name = "cudnn_engines_precompiled",
hdrs = [":headers"],
shared_library = "lib/libcudnn_engines_precompiled.so.%{libcudnn_engines_precompiled_version}",
)
cc_import(
name = "cudnn_engines_runtime_compiled",
hdrs = [":headers"],
shared_library = "lib/libcudnn_engines_runtime_compiled.so.%{libcudnn_engines_runtime_compiled_version}",
)
cc_import(
name = "cudnn_heuristic",
hdrs = [":headers"],
shared_library = "lib/libcudnn_heuristic.so.%{libcudnn_heuristic_version}",
)
cc_import(
name = "cudnn_main",
hdrs = [":headers"],
shared_library = "lib/libcudnn.so.%{libcudnn_version}",
)
%{multiline_comment}
cc_library(
name = "cudnn",
%{comment}deps = [
%{comment}":cudnn_engines_precompiled",
%{comment}":cudnn_ops",
%{comment}":cudnn_graph",
%{comment}":cudnn_cnn",
%{comment}":cudnn_adv",
%{comment}":cudnn_engines_runtime_compiled",
%{comment}":cudnn_heuristic",
%{comment}"@cuda_nvrtc//:nvrtc",
%{comment}":cudnn_main",
%{comment}],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cudnn*.h",
%{comment}]),
include_prefix = "third_party/gpus/cudnn",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,29 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cufft_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcufft.so.%{libcufft_version}",
)
%{multiline_comment}
cc_library(
name = "cufft",
%{comment}deps = [":cufft_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cudalibxt.h",
%{comment}"include/cufft*.h"
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,59 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cupti_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcupti.so.%{libcupti_version}",
)
%{multiline_comment}
cc_library(
name = "cupti",
%{comment}deps = [":cupti_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/Openacc/**",
%{comment}"include/Openmp/**",
%{comment}"include/cuda_stdint.h",
%{comment}"include/cupti.h",
%{comment}"include/cupti_activity.h",
%{comment}"include/cupti_activity_deprecated.h",
%{comment}"include/cupti_callbacks.h",
%{comment}"include/cupti_checkpoint.h",
%{comment}"include/cupti_driver_cbid.h",
%{comment}"include/cupti_events.h",
%{comment}"include/cupti_metrics.h",
%{comment}"include/cupti_nvtx_cbid.h",
%{comment}"include/cupti_pcsampling.h",
%{comment}"include/cupti_pcsampling_util.h",
%{comment}"include/cupti_profiler_target.h",
%{comment}"include/cupti_result.h",
%{comment}"include/cupti_runtime_cbid.h",
%{comment}"include/cupti_sass_metrics.h",
%{comment}"include/cupti_target.h",
%{comment}"include/cupti_version.h",
%{comment}"include/generated_cudaGL_meta.h",
%{comment}"include/generated_cudaVDPAU_meta.h",
%{comment}"include/generated_cuda_gl_interop_meta.h",
%{comment}"include/generated_cuda_meta.h",
%{comment}"include/generated_cuda_runtime_api_meta.h",
%{comment}"include/generated_cuda_vdpau_interop_meta.h",
%{comment}"include/generated_cudart_removed_meta.h",
%{comment}"include/generated_nvtx_meta.h",
%{comment}"include/nvperf_common.h",
%{comment}"include/nvperf_cuda_host.h",
%{comment}"include/nvperf_host.h",
%{comment}"include/nvperf_target.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/extras/CUPTI/include",
includes = ["include/"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,26 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "curand_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcurand.so.%{libcurand_version}",
)
%{multiline_comment}
cc_library(
name = "curand",
%{comment}deps = [":curand_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob(["include/curand*.h"]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,34 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cusolver_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcusolver.so.%{libcusolver_version}",
deps = [
"@cuda_nvjitlink//:nvjitlink",
"@cuda_cusparse//:cusparse",
"@cuda_cublas//:cublas",
"@cuda_cublas//:cublasLt",
],
)
%{multiline_comment}
cc_library(
name = "cusolver",
%{comment}deps = [":cusolver_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cusolver*.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,27 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cusparse_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcusparse.so.%{libcusparse_version}",
deps = ["@cuda_nvjitlink//:nvjitlink"],
)
%{multiline_comment}
cc_library(
name = "cusparse",
%{comment}deps = [":cusparse_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = ["include/cusparse.h"],
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,125 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic CUDA redistributions JSON repository initialization. Consult the WORKSPACE on how to use it."""
load("//third_party:repo.bzl", "tf_mirror_urls")
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_versions.bzl",
"CUDA_REDIST_JSON_DICT",
"CUDNN_REDIST_JSON_DICT",
)
def _get_env_var(ctx, name):
return ctx.os.environ.get(name)
def _get_json_file_content(repository_ctx, url_to_sha256, json_file_name):
if len(url_to_sha256) > 1:
(url, sha256) = url_to_sha256
else:
url = url_to_sha256[0]
sha256 = ""
repository_ctx.download(
url = tf_mirror_urls(url),
sha256 = sha256,
output = json_file_name,
)
return repository_ctx.read(repository_ctx.path(json_file_name))
def _cuda_redist_json_impl(repository_ctx):
cuda_version = (_get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
_get_env_var(repository_ctx, "TF_CUDA_VERSION"))
local_cuda_path = _get_env_var(repository_ctx, "LOCAL_CUDA_PATH")
cudnn_version = (_get_env_var(repository_ctx, "HERMETIC_CUDNN_VERSION") or
_get_env_var(repository_ctx, "TF_CUDNN_VERSION"))
local_cudnn_path = _get_env_var(repository_ctx, "LOCAL_CUDNN_PATH")
supported_cuda_versions = repository_ctx.attr.cuda_json_dict.keys()
if (cuda_version and not local_cuda_path and
(cuda_version not in supported_cuda_versions)):
fail(
("The supported CUDA versions are {supported_versions}." +
" Please provide a supported version in HERMETIC_CUDA_VERSION" +
" environment variable or add JSON URL for" +
" CUDA version={version}.")
.format(
supported_versions = supported_cuda_versions,
version = cuda_version,
),
)
supported_cudnn_versions = repository_ctx.attr.cudnn_json_dict.keys()
if cudnn_version and not local_cudnn_path and (cudnn_version not in supported_cudnn_versions):
fail(
("The supported CUDNN versions are {supported_versions}." +
" Please provide a supported version in HERMETIC_CUDNN_VERSION" +
" environment variable or add JSON URL for" +
" CUDNN version={version}.")
.format(
supported_versions = supported_cudnn_versions,
version = cudnn_version,
),
)
cuda_redistributions = "{}"
cudnn_redistributions = "{}"
if cuda_version and not local_cuda_path:
cuda_redistributions = _get_json_file_content(
repository_ctx,
repository_ctx.attr.cuda_json_dict[cuda_version],
"redistrib_cuda_%s.json" % cuda_version,
)
if cudnn_version and not local_cudnn_path:
cudnn_redistributions = _get_json_file_content(
repository_ctx,
repository_ctx.attr.cudnn_json_dict[cudnn_version],
"redistrib_cudnn_%s.json" % cudnn_version,
)
repository_ctx.file(
"distributions.bzl",
"""CUDA_REDISTRIBUTIONS = {cuda_redistributions}
CUDNN_REDISTRIBUTIONS = {cudnn_redistributions}
""".format(
cuda_redistributions = cuda_redistributions,
cudnn_redistributions = cudnn_redistributions,
),
)
repository_ctx.file(
"BUILD",
"",
)
cuda_redist_json = repository_rule(
implementation = _cuda_redist_json_impl,
attrs = {
"cuda_json_dict": attr.string_list_dict(mandatory = True),
"cudnn_json_dict": attr.string_list_dict(mandatory = True),
},
environ = [
"HERMETIC_CUDA_VERSION",
"HERMETIC_CUDNN_VERSION",
"TF_CUDA_VERSION",
"TF_CUDNN_VERSION",
"LOCAL_CUDA_PATH",
"LOCAL_CUDNN_PATH",
],
)
def cuda_json_init_repository(
cuda_json_dict = CUDA_REDIST_JSON_DICT,
cudnn_json_dict = CUDNN_REDIST_JSON_DICT):
cuda_redist_json(
name = "cuda_redist_json",
cuda_json_dict = cuda_json_dict,
cudnn_json_dict = cudnn_json_dict,
)

View File

@ -0,0 +1,75 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"bin/nvcc",
])
filegroup(
name = "nvvm",
srcs = [
"nvvm/libdevice/libdevice.10.bc",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "nvlink",
srcs = [
"bin/nvlink",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "fatbinary",
srcs = [
"bin/fatbinary",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "bin2c",
srcs = [
"bin/bin2c",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "ptxas",
srcs = [
"bin/ptxas",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "bin",
srcs = glob([
"bin/**",
"nvvm/bin/**",
]),
visibility = ["//visibility:public"],
)
filegroup(
name = "link_stub",
srcs = [
"bin/crt/link.stub",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/crt/**",
%{comment}"include/fatbinary_section.h",
%{comment}"include/nvPTXCompiler.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,17 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "nvjitlink_shared_library",
shared_library = "lib/libnvJitLink.so.%{libnvjitlink_version}",
)
%{multiline_comment}
cc_library(
name = "nvjitlink",
%{comment}deps = [":nvjitlink_shared_library"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,10 @@
licenses(["restricted"]) # NVIDIA proprietary license
cc_library(
name = "headers",
%{comment}hdrs = ["include/nvml.h"],
include_prefix = "third_party/gpus/cuda/nvml/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,9 @@
licenses(["restricted"]) # NVIDIA proprietary license
filegroup(
name = "nvprune",
srcs = [
"bin/nvprune",
],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,20 @@
licenses(["restricted"]) # NVIDIA proprietary license
%{multiline_comment}
cc_import(
name = "nvrtc_main",
shared_library = "lib/libnvrtc.so.%{libnvrtc_version}",
)
cc_import(
name = "nvrtc_builtins",
shared_library = "lib/libnvrtc-builtins.so.%{libnvrtc-builtins_version}",
)
%{multiline_comment}
cc_library(
name = "nvrtc",
%{comment}deps = [
%{comment}":nvrtc_main",
%{comment}":nvrtc_builtins",
%{comment}],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,13 @@
licenses(["restricted"]) # NVIDIA proprietary license
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/nvToolsExt*.h",
%{comment}"include/nvtx3/**",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,491 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic CUDA repositories initialization. Consult the WORKSPACE on how to use it."""
load("//third_party:repo.bzl", "tf_mirror_urls")
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_versions.bzl",
"CUDA_REDIST_PATH_PREFIX",
"CUDNN_REDIST_PATH_PREFIX",
"REDIST_VERSIONS_TO_BUILD_TEMPLATES",
)
OS_ARCH_DICT = {
"amd64": "x86_64-unknown-linux-gnu",
"aarch64": "aarch64-unknown-linux-gnu",
}
_REDIST_ARCH_DICT = {
"linux-x86_64": "x86_64-unknown-linux-gnu",
"linux-sbsa": "aarch64-unknown-linux-gnu",
}
SUPPORTED_ARCHIVE_EXTENSIONS = [
".zip",
".jar",
".war",
".aar",
".tar",
".tar.gz",
".tgz",
".tar.xz",
".txz",
".tar.zst",
".tzst",
".tar.bz2",
".tbz",
".ar",
".deb",
".whl",
]
def get_env_var(ctx, name):
return ctx.os.environ.get(name)
def _get_file_name(url):
last_slash_index = url.rfind("/")
return url[last_slash_index + 1:]
def get_archive_name(url):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns the archive name without extension."""
filename = _get_file_name(url)
for extension in SUPPORTED_ARCHIVE_EXTENSIONS:
if filename.endswith(extension):
return filename[:-len(extension)]
return filename
LIB_EXTENSION = ".so."
def _get_lib_name_and_version(path):
extension_index = path.rfind(LIB_EXTENSION)
last_slash_index = path.rfind("/")
lib_name = path[last_slash_index + 1:extension_index]
lib_version = path[extension_index + len(LIB_EXTENSION):]
return (lib_name, lib_version)
def _get_libraries_by_redist_name_in_dir(repository_ctx):
lib_dir_path = repository_ctx.path("lib")
if not lib_dir_path.exists:
return []
main_lib_name = "lib{}".format(repository_ctx.name.split("_")[1]).lower()
lib_dir_content = lib_dir_path.readdir()
return [
str(f)
for f in lib_dir_content
if (LIB_EXTENSION in str(f) and
main_lib_name in str(f).lower())
]
def get_lib_name_to_version_dict(repository_ctx):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns a dict of library names and major versions."""
lib_name_to_version_dict = {}
for path in _get_libraries_by_redist_name_in_dir(repository_ctx):
lib_name, lib_version = _get_lib_name_and_version(path)
key = "%%{%s_version}" % lib_name.lower()
# We need to find either major or major.minor version if there is no
# file with major version. E.g. if we have the following files:
# libcudart.so
# libcudart.so.12
# libcudart.so.12.3.2,
# we will save save {"%{libcudart_version}": "12"}.
if len(lib_version.split(".")) == 1:
lib_name_to_version_dict[key] = lib_version
if (len(lib_version.split(".")) == 2 and
key not in lib_name_to_version_dict):
lib_name_to_version_dict[key] = lib_version
return lib_name_to_version_dict
def create_dummy_build_file(repository_ctx, use_comment_symbols = True):
repository_ctx.template(
"BUILD",
repository_ctx.attr.build_templates[0],
{
"%{multiline_comment}": "'''" if use_comment_symbols else "",
"%{comment}": "#" if use_comment_symbols else "",
},
)
def _get_build_template(repository_ctx, major_lib_version):
template = None
for i in range(0, len(repository_ctx.attr.versions)):
for dist_version in repository_ctx.attr.versions[i].split(","):
if dist_version == major_lib_version:
template = repository_ctx.attr.build_templates[i]
break
if not template:
fail("No build template found for {} version {}".format(
repository_ctx.name,
major_lib_version,
))
return template
def get_major_library_version(repository_ctx, lib_name_to_version_dict):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns the major library version provided the versions dict."""
major_version = ""
if len(lib_name_to_version_dict) == 0:
return major_version
main_lib_name = "lib{}".format(repository_ctx.name.split("_")[1])
key = "%%{%s_version}" % main_lib_name
major_version = lib_name_to_version_dict[key]
return major_version
def create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_lib_version):
# buildifier: disable=function-docstring-args
"""Creates a BUILD file for the repository."""
if len(major_lib_version) == 0:
build_template_content = repository_ctx.read(
repository_ctx.attr.build_templates[0],
)
if "_version}" not in build_template_content:
create_dummy_build_file(repository_ctx, use_comment_symbols = False)
else:
create_dummy_build_file(repository_ctx)
return
build_template = _get_build_template(
repository_ctx,
major_lib_version.split(".")[0],
)
repository_ctx.template(
"BUILD",
build_template,
lib_name_to_version_dict | {
"%{multiline_comment}": "",
"%{comment}": "",
},
)
def _create_symlinks(repository_ctx, local_path, dirs):
for dir in dirs:
repository_ctx.symlink(
"{path}/{dir}".format(
path = local_path,
dir = dir,
),
dir,
)
def use_local_path(repository_ctx, local_path, dirs):
# buildifier: disable=function-docstring-args
"""Creates repository using local redistribution paths."""
_create_symlinks(
repository_ctx,
local_path,
dirs,
)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(
repository_ctx,
lib_name_to_version_dict,
)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _use_local_cuda_path(repository_ctx, local_cuda_path):
# buildifier: disable=function-docstring-args
""" Creates symlinks and initializes hermetic CUDA repository."""
use_local_path(
repository_ctx,
local_cuda_path,
["include", "lib", "bin", "nvvm"],
)
def _use_local_cudnn_path(repository_ctx, local_cudnn_path):
# buildifier: disable=function-docstring-args
""" Creates symlinks and initializes hermetic CUDNN repository."""
use_local_path(repository_ctx, local_cudnn_path, ["include", "lib"])
def _download_redistribution(repository_ctx, arch_key, path_prefix):
(url, sha256) = repository_ctx.attr.url_dict[arch_key]
# If url is not relative, then appending prefix is not needed.
if not (url.startswith("http") or url.startswith("file:///")):
url = path_prefix + url
archive_name = get_archive_name(url)
file_name = _get_file_name(url)
print("Downloading and extracting {}".format(url)) # buildifier: disable=print
repository_ctx.download(
url = tf_mirror_urls(url),
output = file_name,
sha256 = sha256,
)
if repository_ctx.attr.override_strip_prefix:
strip_prefix = repository_ctx.attr.override_strip_prefix
else:
strip_prefix = archive_name
repository_ctx.extract(
archive = file_name,
stripPrefix = strip_prefix,
)
repository_ctx.delete(file_name)
def _use_downloaded_cuda_redistribution(repository_ctx):
# buildifier: disable=function-docstring-args
""" Downloads CUDA redistribution and initializes hermetic CUDA repository."""
major_version = ""
cuda_version = (get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
get_env_var(repository_ctx, "TF_CUDA_VERSION"))
if not cuda_version:
# If no CUDA version is found, comment out all cc_import targets.
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
if len(repository_ctx.attr.url_dict) == 0:
print("{} is not found in redistributions list.".format(
repository_ctx.name,
)) # buildifier: disable=print
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
# Download archive only when GPU config is used.
arch_key = OS_ARCH_DICT[repository_ctx.os.arch]
if arch_key not in repository_ctx.attr.url_dict.keys():
fail(
("The supported platforms are {supported_platforms}." +
" Platform {platform} is not supported for {dist_name}.")
.format(
supported_platforms = repository_ctx.attr.url_dict.keys(),
platform = arch_key,
dist_name = repository_ctx.name,
),
)
_download_redistribution(
repository_ctx,
arch_key,
repository_ctx.attr.cuda_redist_path_prefix,
)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(repository_ctx, lib_name_to_version_dict)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _cuda_repo_impl(repository_ctx):
local_cuda_path = get_env_var(repository_ctx, "LOCAL_CUDA_PATH")
if local_cuda_path:
_use_local_cuda_path(repository_ctx, local_cuda_path)
else:
_use_downloaded_cuda_redistribution(repository_ctx)
cuda_repo = repository_rule(
implementation = _cuda_repo_impl,
attrs = {
"url_dict": attr.string_list_dict(mandatory = True),
"versions": attr.string_list(mandatory = True),
"build_templates": attr.label_list(mandatory = True),
"override_strip_prefix": attr.string(),
"cuda_redist_path_prefix": attr.string(),
},
environ = [
"HERMETIC_CUDA_VERSION",
"TF_CUDA_VERSION",
"LOCAL_CUDA_PATH",
],
)
def _use_downloaded_cudnn_redistribution(repository_ctx):
# buildifier: disable=function-docstring-args
""" Downloads CUDNN redistribution and initializes hermetic CUDNN repository."""
cudnn_version = None
major_version = ""
cudnn_version = (get_env_var(repository_ctx, "HERMETIC_CUDNN_VERSION") or
get_env_var(repository_ctx, "TF_CUDNN_VERSION"))
cuda_version = (get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
get_env_var(repository_ctx, "TF_CUDA_VERSION"))
if not cudnn_version:
# If no CUDNN version is found, comment out cc_import targets.
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
if len(repository_ctx.attr.url_dict) == 0:
print("{} is not found in redistributions list.".format(
repository_ctx.name,
)) # buildifier: disable=print
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
# Download archive only when GPU config is used.
arch_key = OS_ARCH_DICT[repository_ctx.os.arch]
if arch_key not in repository_ctx.attr.url_dict.keys():
arch_key = "cuda{version}_{arch}".format(
version = cuda_version.split(".")[0],
arch = arch_key,
)
if arch_key not in repository_ctx.attr.url_dict.keys():
fail(
("The supported platforms are {supported_platforms}." +
" Platform {platform} is not supported for {dist_name}.")
.format(
supported_platforms = repository_ctx.attr.url_dict.keys(),
platform = arch_key,
dist_name = repository_ctx.name,
),
)
_download_redistribution(
repository_ctx,
arch_key,
repository_ctx.attr.cudnn_redist_path_prefix,
)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(
repository_ctx,
lib_name_to_version_dict,
)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _cudnn_repo_impl(repository_ctx):
local_cudnn_path = get_env_var(repository_ctx, "LOCAL_CUDNN_PATH")
if local_cudnn_path:
_use_local_cudnn_path(repository_ctx, local_cudnn_path)
else:
_use_downloaded_cudnn_redistribution(repository_ctx)
cudnn_repo = repository_rule(
implementation = _cudnn_repo_impl,
attrs = {
"url_dict": attr.string_list_dict(mandatory = True),
"versions": attr.string_list(mandatory = True),
"build_templates": attr.label_list(mandatory = True),
"override_strip_prefix": attr.string(),
"cudnn_redist_path_prefix": attr.string(),
},
environ = [
"HERMETIC_CUDNN_VERSION",
"TF_CUDNN_VERSION",
"HERMETIC_CUDA_VERSION",
"TF_CUDA_VERSION",
"LOCAL_CUDNN_PATH",
],
)
def _get_redistribution_urls(dist_info):
url_dict = {}
for arch in _REDIST_ARCH_DICT.keys():
if "relative_path" in dist_info[arch]:
url_dict[_REDIST_ARCH_DICT[arch]] = [
dist_info[arch]["relative_path"],
dist_info[arch].get("sha256", ""),
]
continue
if "full_path" in dist_info[arch]:
url_dict[_REDIST_ARCH_DICT[arch]] = [
dist_info[arch]["full_path"],
dist_info[arch].get("sha256", ""),
]
continue
for cuda_version, data in dist_info[arch].items():
# CUDNN JSON might contain paths for each CUDA version.
path_key = "relative_path"
if path_key not in data.keys():
path_key = "full_path"
url_dict["{cuda_version}_{arch}".format(
cuda_version = cuda_version,
arch = _REDIST_ARCH_DICT[arch],
)] = [data[path_key], data.get("sha256", "")]
return url_dict
def get_version_and_template_lists(version_to_template):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns lists of versions and templates provided in the dict."""
template_to_version_map = {}
for version, template in version_to_template.items():
if template not in template_to_version_map.keys():
template_to_version_map[template] = [version]
else:
template_to_version_map[template].append(version)
version_list = []
template_list = []
for template, versions in template_to_version_map.items():
version_list.append(",".join(versions))
template_list.append(Label(template))
return (version_list, template_list)
def cudnn_redist_init_repository(
cudnn_redistributions,
cudnn_redist_path_prefix = CUDNN_REDIST_PATH_PREFIX,
redist_versions_to_build_templates = REDIST_VERSIONS_TO_BUILD_TEMPLATES):
# buildifier: disable=function-docstring-args
"""Initializes CUDNN repository."""
if "cudnn" in cudnn_redistributions.keys():
url_dict = _get_redistribution_urls(cudnn_redistributions["cudnn"])
else:
url_dict = {}
repo_data = redist_versions_to_build_templates["cudnn"]
versions, templates = get_version_and_template_lists(
repo_data["version_to_template"],
)
cudnn_repo(
name = repo_data["repo_name"],
versions = versions,
build_templates = templates,
url_dict = url_dict,
cudnn_redist_path_prefix = cudnn_redist_path_prefix,
)
def cuda_redist_init_repositories(
cuda_redistributions,
cuda_redist_path_prefix = CUDA_REDIST_PATH_PREFIX,
redist_versions_to_build_templates = REDIST_VERSIONS_TO_BUILD_TEMPLATES):
# buildifier: disable=function-docstring-args
"""Initializes CUDA repositories."""
for redist_name, _ in redist_versions_to_build_templates.items():
if redist_name in ["cudnn", "cuda_nccl"]:
continue
if redist_name in cuda_redistributions.keys():
url_dict = _get_redistribution_urls(cuda_redistributions[redist_name])
else:
url_dict = {}
repo_data = redist_versions_to_build_templates[redist_name]
versions, templates = get_version_and_template_lists(
repo_data["version_to_template"],
)
cuda_repo(
name = repo_data["repo_name"],
versions = versions,
build_templates = templates,
url_dict = url_dict,
cuda_redist_path_prefix = cuda_redist_path_prefix,
)

View File

@ -0,0 +1,197 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic CUDA redistribution versions."""
CUDA_REDIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cuda/redist/"
CUDNN_REDIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cudnn/redist/"
CUDA_REDIST_JSON_DICT = {
"11.8": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_11.8.0.json",
"941a950a4ab3b95311c50df7b3c8bca973e0cdda76fc2f4b456d2d5e4dac0281",
],
"12.1.1": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.1.1.json",
"bafea3cb83a4cf5c764eeedcaac0040d0d3c5db3f9a74550da0e7b6ac24d378c",
],
"12.3.1": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.3.1.json",
"b3cc4181d711cf9b6e3718f323b23813c24f9478119911d7b4bceec9b437dbc3",
],
"12.3.2": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.3.2.json",
"1b6eacf335dd49803633fed53ef261d62c193e5a56eee5019e7d2f634e39e7ef",
],
}
CUDNN_REDIST_JSON_DICT = {
"8.6": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.6.0.json",
"7f6f50bed4fd8216dc10d6ef505771dc0ecc99cce813993ab405cb507a21d51d",
],
"8.9.6": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.9.6.json",
"6069ef92a2b9bb18cebfbc944964bd2b024b76f2c2c35a43812982e0bc45cf0c",
],
"8.9.7.29": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.9.7.29.json",
"a0734f26f068522464fa09b2f2c186dfbe6ad7407a88ea0c50dd331f0c3389ec",
],
"9.1.1": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_9.1.1.json",
"d22d569405e5683ff8e563d00d6e8c27e5e6a902c564c23d752b22a8b8b3fe20",
],
}
# The versions are different for x86 and aarch64 architectures because only
# NCCL release versions 2.20.3 and 2.20.5 have the wheels for aarch64.
CUDA_12_NCCL_WHEEL_DICT = {
"x86_64-unknown-linux-gnu": {
"version": "2.21.5",
"url": "https://files.pythonhosted.org/packages/df/99/12cd266d6233f47d00daf3a72739872bdc10267d0383508b0b9c84a18bb6/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl",
"sha256": "8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0",
},
"aarch64-unknown-linux-gnu": {
"version": "2.20.5",
"url": "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl",
"sha256": "1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01",
},
}
CUDA_11_NCCL_WHEEL_DICT = {
"x86_64-unknown-linux-gnu": {
"version": "2.21.5",
"url": "https://files.pythonhosted.org/packages/ac/9a/8b6a28b3b87d5fddab0e92cd835339eb8fbddaa71ae67518c8c1b3d05bae/nvidia_nccl_cu11-2.21.5-py3-none-manylinux2014_x86_64.whl",
"sha256": "49d8350629c7888701d1fd200934942671cb5c728f49acc5a0b3a768820bed29",
},
}
CUDA_NCCL_WHEELS = {
"11.8": CUDA_11_NCCL_WHEEL_DICT,
"12.1.1": CUDA_12_NCCL_WHEEL_DICT,
"12.3.1": CUDA_12_NCCL_WHEEL_DICT,
"12.3.2": CUDA_12_NCCL_WHEEL_DICT,
}
REDIST_VERSIONS_TO_BUILD_TEMPLATES = {
"cuda_nccl": {
"repo_name": "cuda_nccl",
"version_to_template": {
"2": "//third_party/nccl/hermetic:cuda_nccl.BUILD.tpl",
},
},
"cudnn": {
"repo_name": "cuda_cudnn",
"version_to_template": {
"9": "//third_party/gpus/cuda/hermetic:cuda_cudnn9.BUILD.tpl",
"8": "//third_party/gpus/cuda/hermetic:cuda_cudnn.BUILD.tpl",
},
},
"libcublas": {
"repo_name": "cuda_cublas",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cublas.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cublas.BUILD.tpl",
},
},
"cuda_cudart": {
"repo_name": "cuda_cudart",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cudart.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cudart.BUILD.tpl",
},
},
"libcufft": {
"repo_name": "cuda_cufft",
"version_to_template": {
"11": "//third_party/gpus/cuda/hermetic:cuda_cufft.BUILD.tpl",
"10": "//third_party/gpus/cuda/hermetic:cuda_cufft.BUILD.tpl",
},
},
"cuda_cupti": {
"repo_name": "cuda_cupti",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cupti.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cupti.BUILD.tpl",
},
},
"libcurand": {
"repo_name": "cuda_curand",
"version_to_template": {
"10": "//third_party/gpus/cuda/hermetic:cuda_curand.BUILD.tpl",
},
},
"libcusolver": {
"repo_name": "cuda_cusolver",
"version_to_template": {
"11": "//third_party/gpus/cuda/hermetic:cuda_cusolver.BUILD.tpl",
},
},
"libcusparse": {
"repo_name": "cuda_cusparse",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cusparse.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cusparse.BUILD.tpl",
},
},
"libnvjitlink": {
"repo_name": "cuda_nvjitlink",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvjitlink.BUILD.tpl",
},
},
"cuda_nvrtc": {
"repo_name": "cuda_nvrtc",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvrtc.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvrtc.BUILD.tpl",
},
},
"cuda_cccl": {
"repo_name": "cuda_cccl",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cccl.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cccl.BUILD.tpl",
},
},
"cuda_nvcc": {
"repo_name": "cuda_nvcc",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvcc.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvcc.BUILD.tpl",
},
},
"cuda_nvml_dev": {
"repo_name": "cuda_nvml",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvml.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvml.BUILD.tpl",
},
},
"cuda_nvprune": {
"repo_name": "cuda_nvprune",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvprune.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvprune.BUILD.tpl",
},
},
"cuda_nvtx": {
"repo_name": "cuda_nvtx",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvtx.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvtx.BUILD.tpl",
},
},
}

View File

@ -1,5 +1,7 @@
"""Repository rule for CUDA autoconfiguration. """Repository rule for CUDA autoconfiguration.
NB: DEPRECATED! Use `hermetic/cuda_configure` rule instead.
`cuda_configure` depends on the following environment variables: `cuda_configure` depends on the following environment variables:
* `TF_NEED_CUDA`: Whether to enable building with CUDA. * `TF_NEED_CUDA`: Whether to enable building with CUDA.
@ -53,6 +55,11 @@ load(
"realpath", "realpath",
"which", "which",
) )
load(
":compiler_common_tools.bzl",
"get_cxx_inc_directories",
"to_list_of_strings",
)
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
_GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
@ -67,20 +74,6 @@ _TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO"
_TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG" _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG"
_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
def to_list_of_strings(elements):
"""Convert the list of ["a", "b", "c"] into '"a", "b", "c"'.
This is to be used to put a list of strings into the bzl file templates
so it gets interpreted as list of strings in Starlark.
Args:
elements: list of string elements
Returns:
single string of elements wrapped in quotes separated by a comma."""
quoted_strings = ["\"" + element + "\"" for element in elements]
return ", ".join(quoted_strings)
def verify_build_defines(params): def verify_build_defines(params):
"""Verify all variables that crosstool/BUILD.tpl expects are substituted. """Verify all variables that crosstool/BUILD.tpl expects are substituted.
@ -238,156 +231,6 @@ def find_cc(repository_ctx, use_cuda_clang):
" environment variable").format(target_cc_name, cc_path_envvar)) " environment variable").format(target_cc_name, cc_path_envvar))
return cc return cc
_INC_DIR_MARKER_BEGIN = "#include <...>"
# OSX add " (framework directory)" at the end of line, strip it.
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
def _cxx_inc_convert(path):
"""Convert path returned by cc -E xc++ in a complete path."""
path = path.strip()
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
return path
def _normalize_include_path(repository_ctx, path):
"""Normalizes include paths before writing them to the crosstool.
If path points inside the 'crosstool' folder of the repository, a relative
path is returned.
If path points outside the 'crosstool' folder, an absolute path is returned.
"""
path = str(repository_ctx.path(path))
crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
if path.startswith(crosstool_folder):
# We drop the path to "$REPO/crosstool" and a trailing path separator.
return path[len(crosstool_folder) + 1:]
return path
def _is_compiler_option_supported(repository_ctx, cc, option):
"""Checks that `option` is supported by the C compiler. Doesn't %-escape the option."""
result = repository_ctx.execute([
cc,
option,
"-o",
"/dev/null",
"-c",
str(repository_ctx.path("tools/cpp/empty.cc")),
])
return result.stderr.find(option) == -1
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"
sysroot = []
if tf_sysroot:
sysroot += ["--sysroot", tf_sysroot]
result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] +
sysroot)
stderr = err_out(result)
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
return []
index1 = stderr.find("\n", index1)
if index1 == -1:
return []
index2 = stderr.rfind("\n ")
if index2 == -1 or index2 < index1:
return []
index2 = stderr.find("\n", index2 + 1)
if index2 == -1:
inc_dirs = stderr[index1 + 1:]
else:
inc_dirs = stderr[index1 + 1:index2].strip()
print_resource_dir_supported = _is_compiler_option_supported(
repository_ctx,
cc,
"-print-resource-dir",
)
if print_resource_dir_supported:
resource_dir = repository_ctx.execute(
[cc, "-print-resource-dir"],
).stdout.strip() + "/share"
inc_dirs += "\n" + resource_dir
compiler_includes = [
_normalize_include_path(repository_ctx, _cxx_inc_convert(p))
for p in inc_dirs.split("\n")
]
# The compiler might be on a symlink, e.g. /symlink -> /opt/gcc
# The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11)
# but Bazel might encounter either (usually reported by the compiler)
# especially when a compiler wrapper (e.g. ccache) is used.
# So we need to also include paths where symlinks are not resolved.
# Try to find real path to CC installation to "see through" compiler wrappers
# GCC has the path to g++
index1 = result.stderr.find("COLLECT_GCC=")
if index1 != -1:
index1 = result.stderr.find("=", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname
else:
# Clang has the directory
index1 = result.stderr.find("InstalledDir: ")
if index1 != -1:
index1 = result.stderr.find(" ", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname
else:
# Fallback to the CC path
cc_topdir = repository_ctx.path(cc).dirname.dirname
# We now have the compiler installation prefix, e.g. /symlink/gcc
# And the resolved installation prefix, e.g. /opt/gcc
cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip()
cc_topdir = str(cc_topdir).strip()
# If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept.
# e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path]
# adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include]
if cc_topdir_resolved != cc_topdir:
unresolved_compiler_includes = [
cc_topdir + inc[len(cc_topdir_resolved):]
for inc in compiler_includes
if inc.startswith(cc_topdir_resolved)
]
compiler_includes = compiler_includes + unresolved_compiler_includes
return compiler_includes
def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot):
"""Compute the list of default C and C++ include directories."""
# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
True,
tf_sysroot,
)
includes_c = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
False,
tf_sysroot,
)
return includes_cpp + [
inc
for inc in includes_c
if inc not in includes_cpp
]
def auto_configure_fail(msg): def auto_configure_fail(msg):
"""Output failure message when cuda configuration fails.""" """Output failure message when cuda configuration fails."""
red = "\033[0;31m" red = "\033[0;31m"
@ -1293,6 +1136,7 @@ def _create_local_cuda_repository(repository_ctx):
cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
cuda_defines["%{unfiltered_compile_flags}"] = "" cuda_defines["%{unfiltered_compile_flags}"] = ""
cuda_defines["%{cuda_nvcc_files}"] = "[]"
if is_cuda_clang and not is_nvcc_and_clang: if is_cuda_clang and not is_nvcc_and_clang:
cuda_defines["%{host_compiler_path}"] = str(cc) cuda_defines["%{host_compiler_path}"] = str(cc)
cuda_defines["%{host_compiler_warnings}"] = """ cuda_defines["%{host_compiler_warnings}"] = """

View File

@ -14,6 +14,9 @@
# ============================================================================== # ==============================================================================
"""Prints CUDA library and header directories and versions found on the system. """Prints CUDA library and header directories and versions found on the system.
NB: DEPRECATED! This script is a part of the deprecated `cuda_configure` rule.
Please use `hermetic/cuda_configure` instead.
The script searches for CUDA library and header files on the system, inspects The script searches for CUDA library and header files on the system, inspects
them to determine their version and prints the configuration to stdout. them to determine their version and prints the configuration to stdout.
The paths to inspect and the required versions are specified through environment The paths to inspect and the required versions are specified through environment

View File

@ -22,12 +22,15 @@ load(
"realpath", "realpath",
"which", "which",
) )
load(
":compiler_common_tools.bzl",
"to_list_of_strings",
)
load( load(
":cuda_configure.bzl", ":cuda_configure.bzl",
"enable_cuda", "enable_cuda",
"make_copy_dir_rule", "make_copy_dir_rule",
"make_copy_files_rule", "make_copy_files_rule",
"to_list_of_strings",
) )
load( load(
":sycl_configure.bzl", ":sycl_configure.bzl",

View File

@ -16,11 +16,14 @@ load(
"realpath", "realpath",
"which", "which",
) )
load(
":compiler_common_tools.bzl",
"to_list_of_strings",
)
load( load(
":cuda_configure.bzl", ":cuda_configure.bzl",
"make_copy_dir_rule", "make_copy_dir_rule",
"make_copy_files_rule", "make_copy_files_rule",
"to_list_of_strings",
) )
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"

View File

@ -5,7 +5,6 @@ load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
# CUDA toolkit version as tuple (e.g. '(11, 1)'). # CUDA toolkit version as tuple (e.g. '(11, 1)').
_cuda_version = %{cuda_version} _cuda_version = %{cuda_version}
_cuda_clang = %{cuda_clang}
def _rdc_copts(): def _rdc_copts():
"""Returns copts for compiling relocatable device code.""" """Returns copts for compiling relocatable device code."""
@ -121,25 +120,25 @@ _device_link = rule(
"gpu_archs": attr.string_list(), "gpu_archs": attr.string_list(),
"nvlink_args": attr.string_list(), "nvlink_args": attr.string_list(),
"_nvlink": attr.label( "_nvlink": attr.label(
default = Label("@local_config_cuda//cuda:cuda/bin/nvlink"), default = Label("%{nvlink_label}"),
allow_single_file = True, allow_single_file = True,
executable = True, executable = True,
cfg = "host", cfg = "host",
), ),
"_fatbinary": attr.label( "_fatbinary": attr.label(
default = Label("@local_config_cuda//cuda:cuda/bin/fatbinary"), default = Label("%{fatbinary_label}"),
allow_single_file = True, allow_single_file = True,
executable = True, executable = True,
cfg = "host", cfg = "host",
), ),
"_bin2c": attr.label( "_bin2c": attr.label(
default = Label("@local_config_cuda//cuda:cuda/bin/bin2c"), default = Label("%{bin2c_label}"),
allow_single_file = True, allow_single_file = True,
executable = True, executable = True,
cfg = "host", cfg = "host",
), ),
"_link_stub": attr.label( "_link_stub": attr.label(
default = Label("@local_config_cuda//cuda:cuda/bin/crt/link.stub"), default = Label("%{link_stub_label}"),
allow_single_file = True, allow_single_file = True,
), ),
}, },
@ -189,7 +188,7 @@ _prune_relocatable_code = rule(
"input": attr.label(mandatory = True, allow_files = True), "input": attr.label(mandatory = True, allow_files = True),
"gpu_archs": attr.string_list(), "gpu_archs": attr.string_list(),
"_nvprune": attr.label( "_nvprune": attr.label(
default = Label("@local_config_cuda//cuda:cuda/bin/nvprune"), default = Label("%{nvprune_label}"),
allow_single_file = True, allow_single_file = True,
executable = True, executable = True,
cfg = "host", cfg = "host",

0
third_party/nccl/hermetic/BUILD vendored Normal file
View File

View File

@ -0,0 +1,30 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "nccl_shared_library",
shared_library = "lib/libnccl.so.%{libnccl_version}",
hdrs = [":headers"],
deps = ["@local_config_cuda//cuda:cuda_headers", ":headers"],
)
%{multiline_comment}
cc_library(
name = "nccl",
%{comment}deps = [":nccl_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/nccl*.h",
%{comment}]),
include_prefix = "third_party/nccl",
includes = ["include/"],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
deps = ["@local_config_cuda//cuda:cuda_headers"],
)

View File

@ -0,0 +1,183 @@
"""Repository rule for hermetic NCCL configuration.
`nccl_configure` depends on the following environment variables:
* `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should
be used, "0" if NCCL should be linked in statically.
* `HERMETIC_CUDA_VERSION`: The version of the CUDA toolkit. If not specified,
the version will be determined by the `TF_CUDA_VERSION`.
"""
load(
"//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
"HERMETIC_CUDA_VERSION",
"TF_CUDA_VERSION",
"TF_NEED_CUDA",
"enable_cuda",
"get_cuda_version",
)
load(
"//third_party/remote_config:common.bzl",
"get_cpu_value",
"get_host_environ",
)
_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB"
_NCCL_DUMMY_BUILD_CONTENT = """
filegroup(
name = "LICENSE",
visibility = ["//visibility:public"],
)
cc_library(
name = "nccl",
visibility = ["//visibility:public"],
)
cc_library(
name = "nccl_config",
hdrs = ["nccl_config.h"],
include_prefix = "third_party/nccl",
visibility = ["//visibility:public"],
)
"""
_NCCL_ARCHIVE_BUILD_CONTENT = """
filegroup(
name = "LICENSE",
data = ["@nccl_archive//:LICENSE.txt"],
visibility = ["//visibility:public"],
)
alias(
name = "nccl",
actual = select({
"@local_config_cuda//cuda:hermetic_cuda_tools_and_libs": "@cuda_nccl//:nccl",
"//conditions:default": "@nccl_archive//:nccl",
}),
visibility = ["//visibility:public"],
)
cc_library(
name = "hermetic_nccl_config",
hdrs = ["nccl_config.h"],
include_prefix = "third_party/nccl",
visibility = ["//visibility:public"],
)
alias(
name = "nccl_config",
actual = select({
"@local_config_cuda//cuda:hermetic_cuda_tools_and_libs": ":hermetic_nccl_config",
"//conditions:default": "@nccl_archive//:nccl_config",
}),
visibility = ["//visibility:public"],
)
"""
_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """
filegroup(
name = "LICENSE",
data = ["@nccl_archive//:LICENSE.txt"],
visibility = ["//visibility:public"],
)
alias(
name = "nccl",
actual = select({
"@local_config_cuda//cuda:hermetic_cuda_tools_and_libs": "@cuda_nccl//:nccl",
"//conditions:default": "@nccl_archive//:nccl_via_stub",
}),
visibility = ["//visibility:public"],
)
alias(
name = "nccl_headers",
actual = select({
"@local_config_cuda//cuda:hermetic_cuda_tools_and_libs": "@cuda_nccl//:headers",
"//conditions:default": "@nccl_archive//:nccl_headers",
}),
visibility = ["//visibility:public"],
)
cc_library(
name = "hermetic_nccl_config",
hdrs = ["nccl_config.h"],
include_prefix = "third_party/nccl",
visibility = ["//visibility:public"],
)
alias(
name = "nccl_config",
actual = select({
"@local_config_cuda//cuda:hermetic_cuda_tools_and_libs": ":hermetic_nccl_config",
"//conditions:default": "@nccl_archive//:nccl_config",
}),
visibility = ["//visibility:public"],
)
"""
def _create_local_nccl_repository(repository_ctx):
cuda_version = get_cuda_version(repository_ctx).split(".")[:2]
nccl_version = repository_ctx.read(repository_ctx.attr.nccl_version)
if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0":
repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
else:
repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT)
repository_ctx.template("generated_names.bzl", repository_ctx.attr.generated_names_tpl, {})
repository_ctx.template(
"build_defs.bzl",
repository_ctx.attr.build_defs_tpl,
{
"%{cuda_version}": "(%s, %s)" % tuple(cuda_version),
"%{nvlink_label}": "@cuda_nvcc//:nvlink",
"%{fatbinary_label}": "@cuda_nvcc//:fatbinary",
"%{bin2c_label}": "@cuda_nvcc//:bin2c",
"%{link_stub_label}": "@cuda_nvcc//:link_stub",
"%{nvprune_label}": "@cuda_nvprune//:nvprune",
},
)
repository_ctx.file("nccl_config.h", "#define TF_NCCL_VERSION \"%s\"" % nccl_version)
def _nccl_autoconf_impl(repository_ctx):
if (not enable_cuda(repository_ctx) or
get_cpu_value(repository_ctx) != "Linux"):
# Add a dummy build file to make bazel query happy.
repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
repository_ctx.file("nccl_config.h", "#define TF_NCCL_VERSION \"\"")
else:
_create_local_nccl_repository(repository_ctx)
_ENVIRONS = [
TF_NEED_CUDA,
TF_CUDA_VERSION,
_TF_NCCL_USE_STUB,
HERMETIC_CUDA_VERSION,
"LOCAL_NCCL_PATH",
]
nccl_configure = repository_rule(
environ = _ENVIRONS,
implementation = _nccl_autoconf_impl,
attrs = {
"environ": attr.string_dict(),
"nccl_version": attr.label(default = Label("@cuda_nccl//:version.txt")),
"generated_names_tpl": attr.label(default = Label("//third_party/nccl:generated_names.bzl.tpl")),
"build_defs_tpl": attr.label(default = Label("//third_party/nccl:build_defs.bzl.tpl")),
},
)
"""Downloads and configures the hermetic NCCL configuration.
Add the following to your WORKSPACE file:
```python
nccl_configure(name = "local_config_nccl")
```
Args:
name: A unique name for this workspace rule.
""" # buildifier: disable=no-effect

View File

@ -0,0 +1,145 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic NCCL repositories initialization. Consult the WORKSPACE on how to use it."""
load("//third_party:repo.bzl", "tf_mirror_urls")
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"OS_ARCH_DICT",
"create_build_file",
"create_dummy_build_file",
"get_archive_name",
"get_env_var",
"get_lib_name_to_version_dict",
"get_major_library_version",
"get_version_and_template_lists",
"use_local_path",
)
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_versions.bzl",
"CUDA_NCCL_WHEELS",
"REDIST_VERSIONS_TO_BUILD_TEMPLATES",
)
def _use_downloaded_nccl_wheel(repository_ctx):
# buildifier: disable=function-docstring-args
""" Downloads NCCL wheel and inits hermetic NCCL repository."""
cuda_version = (get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
get_env_var(repository_ctx, "TF_CUDA_VERSION"))
major_version = ""
if not cuda_version:
# If no CUDA version is found, comment out cc_import targets.
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
# Download archive only when GPU config is used.
arch = OS_ARCH_DICT[repository_ctx.os.arch]
dict_key = "{cuda_version}-{arch}".format(
cuda_version = cuda_version,
arch = arch,
)
supported_versions = repository_ctx.attr.url_dict.keys()
if dict_key not in supported_versions:
fail(
("The supported NCCL versions are {supported_versions}." +
" Please provide a supported version in HERMETIC_CUDA_VERSION" +
" environment variable or add NCCL distribution for" +
" CUDA version={version}, OS={arch}.")
.format(
supported_versions = supported_versions,
version = cuda_version,
arch = arch,
),
)
sha256 = repository_ctx.attr.sha256_dict[dict_key]
url = repository_ctx.attr.url_dict[dict_key]
archive_name = get_archive_name(url)
file_name = archive_name + ".zip"
print("Downloading and extracting {}".format(url)) # buildifier: disable=print
repository_ctx.download(
url = tf_mirror_urls(url),
output = file_name,
sha256 = sha256,
)
repository_ctx.extract(
archive = file_name,
stripPrefix = repository_ctx.attr.strip_prefix,
)
repository_ctx.delete(file_name)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(
repository_ctx,
lib_name_to_version_dict,
)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _use_local_nccl_path(repository_ctx, local_nccl_path):
# buildifier: disable=function-docstring-args
""" Creates symlinks and initializes hermetic NCCL repository."""
use_local_path(repository_ctx, local_nccl_path, ["include", "lib"])
def _cuda_nccl_repo_impl(repository_ctx):
local_nccl_path = get_env_var(repository_ctx, "LOCAL_NCCL_PATH")
if local_nccl_path:
_use_local_nccl_path(repository_ctx, local_nccl_path)
else:
_use_downloaded_nccl_wheel(repository_ctx)
cuda_nccl_repo = repository_rule(
implementation = _cuda_nccl_repo_impl,
attrs = {
"sha256_dict": attr.string_dict(mandatory = True),
"url_dict": attr.string_dict(mandatory = True),
"versions": attr.string_list(mandatory = True),
"build_templates": attr.label_list(mandatory = True),
"strip_prefix": attr.string(),
},
environ = ["HERMETIC_CUDA_VERSION", "TF_CUDA_VERSION", "LOCAL_NCCL_PATH"],
)
def nccl_redist_init_repository(
cuda_nccl_wheels = CUDA_NCCL_WHEELS,
redist_versions_to_build_templates = REDIST_VERSIONS_TO_BUILD_TEMPLATES):
# buildifier: disable=function-docstring-args
"""Initializes NCCL repository."""
nccl_artifacts_dict = {"sha256_dict": {}, "url_dict": {}}
for cuda_version, nccl_wheel_info in cuda_nccl_wheels.items():
for arch in OS_ARCH_DICT.values():
if arch in nccl_wheel_info.keys():
cuda_version_to_arch_key = "%s-%s" % (cuda_version, arch)
nccl_artifacts_dict["sha256_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch].get("sha256", "")
nccl_artifacts_dict["url_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["url"]
repo_data = redist_versions_to_build_templates["cuda_nccl"]
versions, templates = get_version_and_template_lists(
repo_data["version_to_template"],
)
cuda_nccl_repo(
name = repo_data["repo_name"],
sha256_dict = nccl_artifacts_dict["sha256_dict"],
url_dict = nccl_artifacts_dict["url_dict"],
versions = versions,
build_templates = templates,
strip_prefix = "nvidia/nccl",
)

View File

@ -1,5 +1,7 @@
"""Repository rule for NCCL configuration. """Repository rule for NCCL configuration.
NB: DEPRECATED! Use `hermetic/nccl_configure` rule instead.
`nccl_configure` depends on the following environment variables: `nccl_configure` depends on the following environment variables:
* `TF_NCCL_VERSION`: Installed NCCL version or empty to build from source. * `TF_NCCL_VERSION`: Installed NCCL version or empty to build from source.
@ -8,7 +10,6 @@
files. files.
* `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
`/usr/local/cuda,usr/`. `/usr/local/cuda,usr/`.
* `TF_CUDA_CLANG`: "1" if using Clang, "0" if using NVCC.
* `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should
be used, "0" if NCCL should be linked in statically. be used, "0" if NCCL should be linked in statically.
@ -33,7 +34,6 @@ _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
_TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NCCL_VERSION = "TF_NCCL_VERSION"
_TF_NEED_CUDA = "TF_NEED_CUDA" _TF_NEED_CUDA = "TF_NEED_CUDA"
_TF_CUDA_PATHS = "TF_CUDA_PATHS" _TF_CUDA_PATHS = "TF_CUDA_PATHS"
_TF_CUDA_CLANG = "TF_CUDA_CLANG"
_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" _TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB"
_DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR"
@ -129,7 +129,11 @@ def _create_local_nccl_repository(repository_ctx):
_label("build_defs.bzl.tpl"), _label("build_defs.bzl.tpl"),
{ {
"%{cuda_version}": "(%s, %s)" % tuple(cuda_version), "%{cuda_version}": "(%s, %s)" % tuple(cuda_version),
"%{cuda_clang}": repr(get_host_environ(repository_ctx, _TF_CUDA_CLANG)), "%{nvlink_label}": "@local_config_cuda//cuda:cuda/bin/nvlink",
"%{fatbinary_label}": "@local_config_cuda//cuda:cuda/bin/fatbinary",
"%{bin2c_label}": "@local_config_cuda//cuda:cuda/bin/bin2c",
"%{link_stub_label}": "@local_config_cuda//cuda:cuda/bin/crt/link.stub",
"%{nvprune_label}": "@local_config_cuda//cuda:cuda/bin/nvprune",
}, },
) )
else: else:
@ -181,7 +185,6 @@ _ENVIRONS = [
_TF_CUDA_COMPUTE_CAPABILITIES, _TF_CUDA_COMPUTE_CAPABILITIES,
_TF_NEED_CUDA, _TF_NEED_CUDA,
_TF_CUDA_PATHS, _TF_CUDA_PATHS,
_TF_CUDA_CLANG,
] ]
remote_nccl_configure = repository_rule( remote_nccl_configure = repository_rule(

View File

@ -219,11 +219,16 @@ build:mkl_aarch64_threadpool -c opt
build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --repo_env TF_NEED_CUDA=1
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda build:cuda --@local_config_cuda//:enable_cuda
# Default CUDA and CUDNN versions.
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
# This flag is needed to include hermetic CUDA libraries for bazel tests.
test:cuda --@local_config_cuda//cuda:include_hermetic_cuda_libs=true
# CUDA: This config refers to building CUDA op kernels with clang. # CUDA: This config refers to building CUDA op kernels with clang.
build:cuda_clang --config=cuda build:cuda_clang --config=cuda
build:cuda_clang --action_env=TF_CUDA_CLANG="1"
build:cuda_clang --@local_config_cuda//:cuda_compiler=clang build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
build:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards). # Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds. # This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute # See https://developer.nvidia.com/cuda-gpus#compute
@ -232,22 +237,22 @@ build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
# release while SASS is only forward compatible inside the current # release while SASS is only forward compatible inside the current
# major release. Example: sm_80 kernels can run on sm_89 GPUs but # major release. Example: sm_80 kernels can run on sm_89 GPUs but
# not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs. # not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs.
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90" build:cuda_clang --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
# Set lld as the linker.
build:cuda_clang --host_linkopt="-fuse-ld=lld"
build:cuda_clang --host_linkopt="-lm"
build:cuda_clang --linkopt="-fuse-ld=lld"
build:cuda_clang --linkopt="-lm"
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12" build:cuda_clang_official --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8" build:cuda_clang_official --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain" build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"
# Build with nvcc for CUDA and clang for host # Build with nvcc for CUDA and clang for host
build:nvcc_clang --config=cuda build:nvcc_clang --config=cuda
# Unfortunately, cuda_configure.bzl demands this for using nvcc + clang
build:nvcc_clang --action_env=TF_CUDA_CLANG="1"
build:nvcc_clang --action_env=TF_NVCC_CLANG="1" build:nvcc_clang --action_env=TF_NVCC_CLANG="1"
build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc
@ -543,9 +548,6 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration # For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
build:rbe_linux_cuda_nvcc --config=nvcc_clang build:rbe_linux_cuda_nvcc --config=nvcc_clang
@ -630,7 +632,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/cla
# Test-related settings below this point. # Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS test:release_linux_base --local_test_jobs=HOST_CPUS
test:release_linux_base --test_env=LD_LIBRARY_PATH
# Give only the list of failed tests at the end of the log # Give only the list of failed tests at the end of the log
test:release_linux_base --test_summary=short test:release_linux_base --test_summary=short
@ -644,7 +645,6 @@ build:release_gpu_linux --config=release_cpu_linux
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
# Note that linux cpu and cuda builds share the same toolchain now. # Note that linux cpu and cuda builds share the same toolchain now.
build:release_gpu_linux --config=cuda_clang_official build:release_gpu_linux --config=cuda_clang_official
test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

View File

@ -52,3 +52,50 @@ xla_workspace1()
load(":workspace0.bzl", "xla_workspace0") load(":workspace0.bzl", "xla_workspace0")
xla_workspace0() xla_workspace0()
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
"cuda_json_init_repository",
)
cuda_json_init_repository()
load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"cuda_redist_init_repositories",
"cudnn_redist_init_repository",
)
cuda_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS,
)
cudnn_redist_init_repository(
cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
"cuda_configure",
)
cuda_configure(name = "local_config_cuda")
load(
"@local_tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
"nccl_redist_init_repository",
)
nccl_redist_init_repository()
load(
"@local_tsl//third_party/nccl/hermetic:nccl_configure.bzl",
"nccl_configure",
)
nccl_configure(name = "local_config_nccl")

View File

@ -390,6 +390,14 @@ def main():
"github/xla/.bazelrc", "github/xla/.bazelrc",
], ],
) )
sh(
[
"sed",
"-i",
r"s/8\.9\.7\.29/9.1.1/g",
"github/xla/.bazelrc",
],
)
sh(["nvidia-smi"]) sh(["nvidia-smi"])
build.docker_image.pull_and_run( build.docker_image.pull_and_run(

View File

@ -33,6 +33,7 @@ py_test(
data = [ data = [
"testdata/clang.bazelrc", "testdata/clang.bazelrc",
"testdata/cuda_clang.bazelrc", "testdata/cuda_clang.bazelrc",
"testdata/default_cuda_clang.bazelrc",
"testdata/gcc.bazelrc", "testdata/gcc.bazelrc",
"testdata/nvcc_clang.bazelrc", "testdata/nvcc_clang.bazelrc",
"testdata/nvcc_gcc.bazelrc", "testdata/nvcc_gcc.bazelrc",

View File

@ -27,11 +27,6 @@ Example usage:
the clang in your path. If that isn't the correct clang, you can override like the clang in your path. If that isn't the correct clang, you can override like
`./configure.py --backend=cpu --clang_path=<PATH_TO_YOUR_CLANG>`. `./configure.py --backend=cpu --clang_path=<PATH_TO_YOUR_CLANG>`.
NOTE(ddunleavy): Lots of these things should probably be outside of configure.py
but are here because of complexity in `cuda_configure.bzl` and the TF bazelrc.
Once XLA has it's own bazelrc, and cuda_configure.bzl is replaced or refactored,
we can probably make this file smaller.
TODO(ddunleavy): add more thorough validation. TODO(ddunleavy): add more thorough validation.
""" """
import argparse import argparse
@ -45,18 +40,9 @@ import subprocess
import sys import sys
from typing import Optional from typing import Optional
_REQUIRED_CUDA_LIBRARIES = ["cublas", "cuda", "cudnn"]
_DEFAULT_BUILD_AND_TEST_TAG_FILTERS = ("-no_oss",) _DEFAULT_BUILD_AND_TEST_TAG_FILTERS = ("-no_oss",)
# Assume we are being invoked from the symlink at the root of the repo # Assume we are being invoked from the symlink at the root of the repo
_XLA_SRC_ROOT = pathlib.Path(__file__).absolute().parent _XLA_SRC_ROOT = pathlib.Path(__file__).absolute().parent
_FIND_CUDA_CONFIG = str(
_XLA_SRC_ROOT
/ "third_party"
/ "tsl"
/ "third_party"
/ "gpus"
/ "find_cuda_config.py"
)
_XLA_BAZELRC_NAME = "xla_configure.bazelrc" _XLA_BAZELRC_NAME = "xla_configure.bazelrc"
_KW_ONLY_IF_PYTHON310 = {"kw_only": True} if sys.version_info >= (3, 10) else {} _KW_ONLY_IF_PYTHON310 = {"kw_only": True} if sys.version_info >= (3, 10) else {}
@ -224,11 +210,12 @@ class DiscoverablePathsAndVersions:
ld_library_path: Optional[str] = None ld_library_path: Optional[str] = None
# CUDA specific # CUDA specific
cublas_version: Optional[str] = None cuda_version: Optional[str] = None
cuda_toolkit_path: Optional[str] = None
cuda_compute_capabilities: Optional[list[str]] = None cuda_compute_capabilities: Optional[list[str]] = None
cudnn_version: Optional[str] = None cudnn_version: Optional[str] = None
nccl_version: Optional[str] = None local_cuda_path: Optional[str] = None
local_cudnn_path: Optional[str] = None
local_nccl_path: Optional[str] = None
def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"): def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"):
"""Gets paths and versions as needed by the config. """Gets paths and versions as needed by the config.
@ -247,7 +234,7 @@ class DiscoverablePathsAndVersions:
) )
# Notably, we don't use `_find_executable_or_die` for lld, as it changes # Notably, we don't use `_find_executable_or_die` for lld, as it changes
# which commands it accepts based on it's name! ld.lld is symlinked to a # which commands it accepts based on its name! ld.lld is symlinked to a
# different executable just called lld, which should not be invoked # different executable just called lld, which should not be invoked
# directly. # directly.
self.lld_path = self.lld_path or shutil.which("ld.lld") self.lld_path = self.lld_path or shutil.which("ld.lld")
@ -261,64 +248,6 @@ class DiscoverablePathsAndVersions:
if not self.cuda_compute_capabilities: if not self.cuda_compute_capabilities:
self.cuda_compute_capabilities = _get_cuda_compute_capabilities_or_die() self.cuda_compute_capabilities = _get_cuda_compute_capabilities_or_die()
self._get_cuda_libraries_paths_and_versions_if_needed(config)
def _get_cuda_libraries_paths_and_versions_if_needed(
self, config: "XLAConfigOptions"
):
"""Gets cuda paths and versions if user left any unspecified.
This uses `find_cuda_config.py` to find versions for all libraries in
`_REQUIRED_CUDA_LIBRARIES`.
Args:
config: config that determines which libraries should be found.
"""
should_find_nccl = config.using_nccl and self.nccl_version is None
any_cuda_config_unset = any([
self.cublas_version is None,
self.cuda_toolkit_path is None,
self.cudnn_version is None,
should_find_nccl,
])
maybe_nccl = ["nccl"] if should_find_nccl else []
if any_cuda_config_unset:
logging.info(
"Some CUDA config versions and paths were not provided, "
"so trying to find them using find_cuda_config.py"
)
try:
find_cuda_config_proc = subprocess.run(
[
sys.executable,
_FIND_CUDA_CONFIG,
*_REQUIRED_CUDA_LIBRARIES,
*maybe_nccl,
],
capture_output=True,
check=True,
text=True,
)
except subprocess.CalledProcessError as e:
logging.info("Command %s failed. Is CUDA installed?", e.cmd)
logging.info("Dumping %s ouptut:\n %s", e.cmd, e.output)
raise e
cuda_config = dict(
tuple(line.split(": "))
for line in find_cuda_config_proc.stdout.strip().split("\n")
)
self.cublas_version = self.cublas_version or cuda_config["cublas_version"]
self.cuda_toolkit_path = (
self.cuda_toolkit_path or cuda_config["cuda_toolkit_path"]
)
self.cudnn_version = self.cudnn_version or cuda_config["cudnn_version"]
if should_find_nccl:
self.nccl_version = self.nccl_version or cuda_config["nccl_version"]
@dataclasses.dataclass(frozen=True, **_KW_ONLY_IF_PYTHON310) @dataclasses.dataclass(frozen=True, **_KW_ONLY_IF_PYTHON310)
class XLAConfigOptions: class XLAConfigOptions:
@ -391,18 +320,31 @@ class XLAConfigOptions:
) )
# Lines needed for CUDA backend regardless of CUDA/host compiler # Lines needed for CUDA backend regardless of CUDA/host compiler
if dpav.cuda_version:
rc.append(
f"build:cuda --repo_env HERMETIC_CUDA_VERSION={dpav.cuda_version}"
)
rc.append( rc.append(
f"build --action_env CUDA_TOOLKIT_PATH={dpav.cuda_toolkit_path}" "build:cuda --repo_env"
f" HERMETIC_CUDA_COMPUTE_CAPABILITIES={','.join(dpav.cuda_compute_capabilities)}"
) )
rc.append(f"build --action_env TF_CUBLAS_VERSION={dpav.cublas_version}") if dpav.cudnn_version:
rc.append( rc.append(
"build --action_env" f"build:cuda --repo_env HERMETIC_CUDNN_VERSION={dpav.cudnn_version}"
f" TF_CUDA_COMPUTE_CAPABILITIES={','.join(dpav.cuda_compute_capabilities)}" )
) if dpav.local_cuda_path:
rc.append(f"build --action_env TF_CUDNN_VERSION={dpav.cudnn_version}") rc.append(
if self.using_nccl: f"build:cuda --repo_env LOCAL_CUDA_PATH={dpav.local_cuda_path}"
rc.append(f"build --action_env TF_NCCL_VERSION={dpav.nccl_version}") )
else: if dpav.local_cudnn_path:
rc.append(
f"build:cuda --repo_env LOCAL_CUDNN_PATH={dpav.local_cudnn_path}"
)
if dpav.local_nccl_path:
rc.append(
f"build:cuda --repo_env LOCAL_NCCL_PATH={dpav.local_nccl_path}"
)
if not self.using_nccl:
rc.append("build --config nonccl") rc.append("build --config nonccl")
elif self.backend == Backend.ROCM: elif self.backend == Backend.ROCM:
pass pass
@ -489,13 +431,35 @@ def _parse_args():
parser.add_argument("--lld_path", help=path_help) parser.add_argument("--lld_path", help=path_help)
# CUDA specific # CUDA specific
find_cuda_config_help = ( parser.add_argument(
"Optional: will be found using `find_cuda_config.py` if flag is not set." "--cuda_version",
help="Optional: CUDA will be downloaded by Bazel if the flag is set",
)
parser.add_argument(
"--cudnn_version",
help="Optional: CUDNN will be downloaded by Bazel if the flag is set",
)
parser.add_argument(
"--local_cuda_path",
help=(
"Optional: Local CUDA dir will be used in dependencies if the flag"
" is set"
),
)
parser.add_argument(
"--local_cudnn_path",
help=(
"Optional: Local CUDNN dir will be used in dependencies if the flag"
" is set"
),
)
parser.add_argument(
"--local_nccl_path",
help=(
"Optional: Local NCCL dir will be used in dependencies if the flag"
" is set"
),
) )
parser.add_argument("--cublas_version", help=find_cuda_config_help)
parser.add_argument("--cuda_toolkit_path", help=find_cuda_config_help)
parser.add_argument("--cudnn_version", help=find_cuda_config_help)
parser.add_argument("--nccl_version", help=find_cuda_config_help)
return parser.parse_args() return parser.parse_args()
@ -523,11 +487,12 @@ def main():
gcc_path=args.gcc_path, gcc_path=args.gcc_path,
lld_path=args.lld_path, lld_path=args.lld_path,
ld_library_path=args.ld_library_path, ld_library_path=args.ld_library_path,
cublas_version=args.cublas_version, cuda_version=args.cuda_version,
cuda_compute_capabilities=args.cuda_compute_capabilities,
cuda_toolkit_path=args.cuda_toolkit_path,
cudnn_version=args.cudnn_version, cudnn_version=args.cudnn_version,
nccl_version=args.nccl_version, cuda_compute_capabilities=args.cuda_compute_capabilities,
local_cuda_path=args.local_cuda_path,
local_cudnn_path=args.local_cudnn_path,
local_nccl_path=args.local_nccl_path,
) )
) )

View File

@ -34,12 +34,20 @@ _COMPILER_OPTIONS = ("-Wno-sign-compare",)
# CUDA specific paths and versions # CUDA specific paths and versions
_CUDA_SPECIFIC_PATHS_AND_VERSIONS = { _CUDA_SPECIFIC_PATHS_AND_VERSIONS = {
"cublas_version": "12.3", "cuda_version": '"12.1.1"',
"cuda_toolkit_path": "/usr/local/cuda-12.2",
"cuda_compute_capabilities": ["7.5"], "cuda_compute_capabilities": ["7.5"],
"cudnn_version": "8", "cudnn_version": '"8.6"',
"ld_library_path": "/usr/local/nvidia/lib:/usr/local/nvidia/lib64",
}
_CUDA_COMPUTE_CAPABILITIES_AND_LD_LIBRARY_PATH = {
"cuda_compute_capabilities": [
"sm_50",
"sm_60",
"sm_70",
"sm_80",
"compute_90",
],
"ld_library_path": "/usr/local/nvidia/lib:/usr/local/nvidia/lib64", "ld_library_path": "/usr/local/nvidia/lib:/usr/local/nvidia/lib64",
"nccl_version": "2",
} }
@ -66,6 +74,11 @@ class ConfigureTest(absltest.TestCase):
with (testdata / "cuda_clang.bazelrc").open() as f: with (testdata / "cuda_clang.bazelrc").open() as f:
cls.cuda_clang_bazelrc_lines = [line.strip() for line in f.readlines()] cls.cuda_clang_bazelrc_lines = [line.strip() for line in f.readlines()]
with (testdata / "default_cuda_clang.bazelrc").open() as f:
cls.default_cuda_clang_bazelrc_lines = [
line.strip() for line in f.readlines()
]
with (testdata / "nvcc_clang.bazelrc").open() as f: with (testdata / "nvcc_clang.bazelrc").open() as f:
cls.nvcc_clang_bazelrc_lines = [line.strip() for line in f.readlines()] cls.nvcc_clang_bazelrc_lines = [line.strip() for line in f.readlines()]
@ -138,6 +151,27 @@ class ConfigureTest(absltest.TestCase):
self.assertEqual(bazelrc_lines, self.cuda_clang_bazelrc_lines) self.assertEqual(bazelrc_lines, self.cuda_clang_bazelrc_lines)
def test_default_cuda_clang_bazelrc(self):
config = XLAConfigOptions(
backend=Backend.CUDA,
os=OS.LINUX,
python_bin_path=_PYTHON_BIN_PATH,
host_compiler=HostCompiler.CLANG,
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.CLANG,
using_nccl=False,
)
bazelrc_lines = config.to_bazelrc_lines(
DiscoverablePathsAndVersions(
clang_path=_CLANG_PATH,
clang_major_version=17,
**_CUDA_COMPUTE_CAPABILITIES_AND_LD_LIBRARY_PATH,
)
)
self.assertEqual(bazelrc_lines, self.default_cuda_clang_bazelrc_lines)
def test_nvcc_clang_bazelrc(self): def test_nvcc_clang_bazelrc(self):
config = XLAConfigOptions( config = XLAConfigOptions(
backend=Backend.CUDA, backend=Backend.CUDA,

View File

@ -3,10 +3,9 @@ build --repo_env CC=/usr/lib/llvm-18/bin/clang
build --repo_env BAZEL_COMPILER=/usr/lib/llvm-18/bin/clang build --repo_env BAZEL_COMPILER=/usr/lib/llvm-18/bin/clang
build --config cuda_clang build --config cuda_clang
build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-18/bin/clang build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-18/bin/clang
build --action_env CUDA_TOOLKIT_PATH=/usr/local/cuda-12.2 build:cuda --repo_env HERMETIC_CUDA_VERSION="12.1.1"
build --action_env TF_CUBLAS_VERSION=12.3 build:cuda --repo_env HERMETIC_CUDA_COMPUTE_CAPABILITIES=7.5
build --action_env TF_CUDA_COMPUTE_CAPABILITIES=7.5 build:cuda --repo_env HERMETIC_CUDNN_VERSION="8.6"
build --action_env TF_CUDNN_VERSION=8
build --config nonccl build --config nonccl
build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
build --action_env PYTHON_BIN_PATH=/usr/bin/python3 build --action_env PYTHON_BIN_PATH=/usr/bin/python3

View File

@ -0,0 +1,19 @@
build --action_env CLANG_COMPILER_PATH=/usr/lib/llvm-18/bin/clang
build --repo_env CC=/usr/lib/llvm-18/bin/clang
build --repo_env BAZEL_COMPILER=/usr/lib/llvm-18/bin/clang
build --config cuda_clang
build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-18/bin/clang
build:cuda --repo_env HERMETIC_CUDA_COMPUTE_CAPABILITIES=sm_50,sm_60,sm_70,sm_80,compute_90
build --config nonccl
build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
build --action_env PYTHON_BIN_PATH=/usr/bin/python3
build --python_path /usr/bin/python3
test --test_env LD_LIBRARY_PATH
test --test_size_filters small,medium
build --copt -Wno-sign-compare
build --copt -Wno-error=unused-command-line-argument
build --copt -Wno-gnu-offsetof-extensions
build --build_tag_filters -no_oss
build --test_tag_filters -no_oss
test --build_tag_filters -no_oss
test --test_tag_filters -no_oss

View File

@ -3,10 +3,9 @@ build --repo_env CC=/usr/lib/llvm-18/bin/clang
build --repo_env BAZEL_COMPILER=/usr/lib/llvm-18/bin/clang build --repo_env BAZEL_COMPILER=/usr/lib/llvm-18/bin/clang
build --config nvcc_clang build --config nvcc_clang
build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-18/bin/clang build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-18/bin/clang
build --action_env CUDA_TOOLKIT_PATH=/usr/local/cuda-12.2 build:cuda --repo_env HERMETIC_CUDA_VERSION="12.1.1"
build --action_env TF_CUBLAS_VERSION=12.3 build:cuda --repo_env HERMETIC_CUDA_COMPUTE_CAPABILITIES=7.5
build --action_env TF_CUDA_COMPUTE_CAPABILITIES=7.5 build:cuda --repo_env HERMETIC_CUDNN_VERSION="8.6"
build --action_env TF_CUDNN_VERSION=8
build --config nonccl build --config nonccl
build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
build --action_env PYTHON_BIN_PATH=/usr/bin/python3 build --action_env PYTHON_BIN_PATH=/usr/bin/python3

View File

@ -1,9 +1,8 @@
build --action_env GCC_HOST_COMPILER_PATH=/usr/bin/gcc build --action_env GCC_HOST_COMPILER_PATH=/usr/bin/gcc
build --config cuda build --config cuda
build --action_env CUDA_TOOLKIT_PATH=/usr/local/cuda-12.2 build:cuda --repo_env HERMETIC_CUDA_VERSION="12.1.1"
build --action_env TF_CUBLAS_VERSION=12.3 build:cuda --repo_env HERMETIC_CUDA_COMPUTE_CAPABILITIES=7.5
build --action_env TF_CUDA_COMPUTE_CAPABILITIES=7.5 build:cuda --repo_env HERMETIC_CUDNN_VERSION="8.6"
build --action_env TF_CUDNN_VERSION=8
build --config nonccl build --config nonccl
build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
build --action_env PYTHON_BIN_PATH=/usr/bin/python3 build --action_env PYTHON_BIN_PATH=/usr/bin/python3

View File

@ -65,12 +65,11 @@ docker exec xla_gpu ./configure.py --backend=CUDA
docker exec xla_gpu bazel build --test_output=all --spawn_strategy=sandboxed //xla/... docker exec xla_gpu bazel build --test_output=all --spawn_strategy=sandboxed //xla/...
``` ```
If you want to build XLA targets with GPU support without Docker you need to For more details regarding
install the following additional dependencies: [TensorFlow's GPU docker images you can check out this document.](https://www.tensorflow.org/install/source#gpu_support_3)
[`cuda-12.3`](https://developer.nvidia.com/cuda-downloads),
[`cuDNN-8.9`](https://developer.nvidia.com/cudnn).
Then configure and build targets using the following commands: You can build XLA targets with GPU support without Docker as well. Configure and
build targets using the following commands:
``` ```
./configure.py --backend=CUDA ./configure.py --backend=CUDA
@ -79,4 +78,4 @@ bazel build --test_output=all --spawn_strategy=sandboxed //xla/...
``` ```
For more details regarding For more details regarding
[TensorFlow's GPU docker images you can check out this document.](https://www.tensorflow.org/install/source#gpu_support_3) [hermetic CUDA you can check out this document.](docs/hermetic_cuda.md)

View File

@ -64,6 +64,16 @@ docker exec xla ./configure.py --backend=CUDA
docker exec xla bazel build --test_output=all --spawn_strategy=sandboxed //xla/... docker exec xla bazel build --test_output=all --spawn_strategy=sandboxed //xla/...
``` ```
**NB:** please note that with hermetic CUDA rules, you don't have to build XLA
in Docker. You can build XLA for GPU on your machine without GPUs and without
NVIDIA driver installed:
```sh
./configure.py --backend=CUDA
bazel build --test_output=all --spawn_strategy=sandboxed //xla/...
```
Your first build will take quite a while because it has to build the entire Your first build will take quite a while because it has to build the entire
stack, including XLA, MLIR, and StableHLO. stack, including XLA, MLIR, and StableHLO.

544
third_party/xla/docs/hermetic_cuda.md vendored Normal file
View File

@ -0,0 +1,544 @@
# Hermetic CUDA overview
Hermetic CUDA uses a specific downloadable version of CUDA instead of the users
locally installed CUDA. Bazel will download CUDA, CUDNN and NCCL distributions,
and then use CUDA libraries and tools as dependencies in various Bazel targets.
This enables more reproducible builds for Google ML projects and supported CUDA
versions.
## Supported hermetic CUDA, CUDNN versions
The supported CUDA versions are specified in `CUDA_REDIST_JSON_DICT`
dictionary,
[third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl).
The supported CUDNN versions are specified in `CUDNN_REDIST_JSON_DICT`
dictionary,
[third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl).
The `.bazelrc` files of individual projects have `HERMETIC_CUDA_VERSION`,
`HERMETIC_CUDNN_VERSION` environment variables set to the versions used by
default when `--config=cuda` is specified in Bazel command options.
## Environment variables controlling the hermetic CUDA/CUDNN versions
`HERMETIC_CUDA_VERSION` environment variable should consist of major, minor and
patch CUDA version, e.g. `12.3.2`.
`HERMETIC_CUDNN_VERSION` environment variable should consist of major, minor and
patch CUDNN version, e.g. `9.1.1`.
Three ways to set the environment variables for Bazel commands:
```
# Add an entry to your `.bazelrc` file
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.1.1"
# OR pass it directly to your specific build command
bazel build --config=cuda <target> \
--repo_env=HERMETIC_CUDA_VERSION="12.3.2" \
--repo_env=HERMETIC_CUDNN_VERSION="9.1.1"
# OR set the environment variable globally in your shell:
export HERMETIC_CUDA_VERSION="12.3.2"
export LOCAL_CUDNN_PATH="/foo/bar/nvidia/cudnn"
export HERMETIC_CUDNN_VERSION="9.1.1"
```
If `HERMETIC_CUDA_VERSION` and `HERMETIC_CUDNN_VERSION` are not present, the
hermetic CUDA/CUDNN repository rules will look up `TF_CUDA_VERSION` and
`TF_CUDNN_VERSION` environment variables values. This is made for the backward
compatibility with non-hermetic CUDA/CUDNN repository rules.
The mapping between CUDA version and NCCL distribution version to be downloaded
is specified in [third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl)
## Upgrade hermetic CUDA/CUDNN version
1. Create and submit a pull request with updated `CUDA_REDIST_JSON_DICT`,
`CUDA_REDIST_JSON_DICT` dictionaries in
[third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl).
Update `CUDA_NCCL_WHEELS` in
[third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl)
if needed.
Update `REDIST_VERSIONS_TO_BUILD_TEMPLATES` in
[third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl)
if needed.
2. For RBE executions: update `TF_CUDA_VERSION` and/or `TF_CUDNN_VERSION` in
[toolchains/remote_config/rbe_config.bzl](https://github.com/openxla/xla/blob/main/third_party/tsl/tools/toolchains/remote_config/rbe_config.bzl).
3. For RBE executions: update `cuda_version`, `cudnn_version`, `TF_CUDA_VERSION`
and `TF_CUDNN_VERSION` in
[toolchains/remote_config/configs.bzl](https://github.com/openxla/xla/blob/main/tools/toolchains/remote_config/configs.bzl).
4. For each Google ML project create a separate pull request with updated
`HERMETIC_CUDA_VERSION` and `HERMETIC_CUDNN_VERSION` in `.bazelrc` file.
The PR presubmit job executions will launch bazel tests and download hermetic
CUDA/CUDNN distributions. Verify that the presubmit jobs passed before
submitting the PR.
## Pointing to CUDA/CUDNN/NCCL redistributions on local file system
You can use the local CUDA/CUDNN/NCCL dirs as a source of redistributions. The following additional environment variables are required:
```
LOCAL_CUDA_PATH
LOCAL_CUDNN_PATH
LOCAL_NCCL_PATH
```
Example:
```
# Add an entry to your `.bazelrc` file
build:cuda --repo_env=LOCAL_CUDA_PATH="/foo/bar/nvidia/cuda"
build:cuda --repo_env=LOCAL_CUDNN_PATH="/foo/bar/nvidia/cudnn"
build:cuda --repo_env=LOCAL_NCCL_PATH="/foo/bar/nvidia/nccl"
# OR pass it directly to your specific build command
bazel build --config=cuda <target> \
--repo_env=LOCAL_CUDA_PATH="/foo/bar/nvidia/cuda" \
--repo_env=LOCAL_CUDNN_PATH="/foo/bar/nvidia/cudnn" \
--repo_env=LOCAL_NCCL_PATH="/foo/bar/nvidia/nccl"
# OR set the environment variable globally in your shell:
export LOCAL_CUDA_PATH="/foo/bar/nvidia/cuda"
export LOCAL_CUDNN_PATH="/foo/bar/nvidia/cudnn"
export LOCAL_NCCL_PATH="/foo/bar/nvidia/nccl"
```
The structure of the folders inside CUDA dir should be the following (as if the archived redistributions were unpacked into one place):
```
<LOCAL_CUDA_PATH>/
include/
bin/
lib/
nvvm/
```
The structure of the folders inside CUDNN dir should be the following:
```
<LOCAL_CUDNN_PATH>
include/
lib/
```
The structure of the folders inside NCCL dir should be the following:
```
<LOCAL_NCCL_PATH>
include/
lib/
```
## Custom CUDA/CUDNN archives and NCCL wheels
There are three options that allow usage of custom CUDA/CUDNN distributions.
### Custom CUDA/CUDNN redistribution JSON files
This option allows to use custom distributions for all CUDA/CUDNN dependencies
in Google ML projects.
1. Create `cuda_redist.json` and/or `cudnn_redist.json` files.
`cuda_redist.json` show follow the format below:
```
{
"cuda_cccl": {
"linux-x86_64": {
"relative_path": "cuda_cccl-linux-x86_64-12.4.99-archive.tar.xz",
},
"linux-sbsa": {
"relative_path": "cuda_cccl-linux-sbsa-12.4.99-archive.tar.xz",
}
},
}
```
`cudnn_redist.json` show follow the format below:
```
{
"cudnn": {
"linux-x86_64": {
"cuda12": {
"relative_path": "cudnn/linux-x86_64/cudnn-linux-x86_64-9.0.0.312_cuda12-archive.tar.xz",
}
},
"linux-sbsa": {
"cuda12": {
"relative_path": "cudnn/linux-sbsa/cudnn-linux-sbsa-9.0.0.312_cuda12-archive.tar.xz",
}
}
}
}
```
The `relative_path` field can be replaced with `full_path` for the full URLs
and absolute local paths starting with `file:///`.
2. In the downstream project dependent on XLA, update the hermetic cuda JSON
repository call in `WORKSPACE` file. Both web links and local file paths are
allowed. Example:
```
_CUDA_JSON_DICT = {
"12.4.0": [
"file:///home/user/Downloads/redistrib_12.4.0_updated.json",
],
}
_CUDNN_JSON_DICT = {
"9.0.0": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_9.0.0.json",
],
}
cuda_json_init_repository(
cuda_json_dict = _CUDA_JSON_DICT,
cudnn_json_dict = _CUDNN_JSON_DICT,
)
```
If JSON files contain relative paths to distributions, the path prefix should
be updated in `cuda_redist_init_repositories()` and
`cudnn_redist_init_repository()` calls. Example
```
cuda_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS,
cuda_redist_path_prefix = "file:///usr/Downloads/dists/",
)
```
### Custom CUDA/CUDNN distributions
This option allows to use custom distributions for some CUDA/CUDNN dependencies
in Google ML projects.
1. In the downstream project dependent on XLA, remove the lines below:
```
<...>
"CUDA_REDIST_JSON_DICT",
<...>
"CUDNN_REDIST_JSON_DICT",
<...>
cuda_json_init_repository(
cuda_json_dict = CUDA_REDIST_JSON_DICT,
cudnn_json_dict = CUDNN_REDIST_JSON_DICT,
)
load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
```
2. In the same `WORKSPACE` file, create dictionaries with distribution paths.
The dictionary with CUDA distributions show follow the format below:
```
_CUSTOM_CUDA_REDISTRIBUTIONS = {
"cuda_cccl": {
"linux-x86_64": {
"relative_path": "cuda_cccl-linux-x86_64-12.4.99-archive.tar.xz",
},
"linux-sbsa": {
"relative_path": "cuda_cccl-linux-sbsa-12.4.99-archive.tar.xz",
}
},
}
```
The dictionary with CUDNN distributions show follow the format below:
```
_CUSTOM_CUDNN_REDISTRIBUTIONS = {
"cudnn": {
"linux-x86_64": {
"cuda12": {
"relative_path": "cudnn/linux-x86_64/cudnn-linux-x86_64-9.0.0.312_cuda12-archive.tar.xz",
}
},
"linux-sbsa": {
"cuda12": {
"relative_path": "cudnn/linux-sbsa/cudnn-linux-sbsa-9.0.0.312_cuda12-archive.tar.xz",
}
}
}
}
```
The `relative_path` field can be replaced with `full_path` for the full URLs
and absolute local paths starting with `file:///`.
2. In the same `WORKSPACE` file, pass the created dictionaries to the repository
rule. If the dictionaries contain relative paths to distributions, the path
prefix should be updated in `cuda_redist_init_repositories()` and
`cudnn_redist_init_repository()` calls.
```
cuda_redist_init_repositories(
cuda_redistributions = _CUSTOM_CUDA_REDISTRIBUTIONS,
cuda_redist_path_prefix = "file:///home/usr/Downloads/dists/",
)
cudnn_redist_init_repository(
cudnn_redistributions = _CUSTOM_CUDNN_REDISTRIBUTIONS,
cudnn_redist_path_prefix = "file:///home/usr/Downloads/dists/cudnn/"
)
```
### Combination of the options above
In the example below, `CUDA_REDIST_JSON_DICT` is merged with custom JSON data in
`_CUDA_JSON_DICT`, and `CUDNN_REDIST_JSON_DICT` is merged with
`_CUDNN_JSON_DICT`.
The distributions data in `_CUDA_DIST_DICT` overrides the content of resulting
CUDA JSON file, and the distributions data in `_CUDNN_DIST_DICT` overrides the
content of resulting CUDNN JSON file. The NCCL wheels data is merged from
`CUDA_NCCL_WHEELS` and `_NCCL_WHEEL_DICT`.
```
load(
//third_party/gpus/cuda/hermetic:cuda_redist_versions.bzl",
"CUDA_REDIST_PATH_PREFIX",
"CUDA_NCCL_WHEELS",
"CUDA_REDIST_JSON_DICT",
"CUDNN_REDIST_PATH_PREFIX",
"CUDNN_REDIST_JSON_DICT",
)
_CUDA_JSON_DICT = {
"12.4.0": [
"file:///usr/Downloads/redistrib_12.4.0_updated.json",
],
}
_CUDNN_JSON_DICT = {
"9.0.0": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_9.0.0.json",
],
}
cuda_json_init_repository(
cuda_json_dict = CUDA_REDIST_JSON_DICT | _CUDA_JSON_DICT,
cudnn_json_dict = CUDNN_REDIST_JSON_DICT | _CUDNN_JSON_DICT,
)
load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"cuda_redist_init_repositories",
"cudnn_redist_init_repository",
)
_CUDA_DIST_DICT = {
"cuda_cccl": {
"linux-x86_64": {
"relative_path": "cuda_cccl-linux-x86_64-12.4.99-archive.tar.xz",
},
"linux-sbsa": {
"relative_path": "cuda_cccl-linux-sbsa-12.4.99-archive.tar.xz",
},
},
"libcusolver": {
"linux-x86_64": {
"full_path": "file:///usr/Downloads/dists/libcusolver-linux-x86_64-11.6.0.99-archive.tar.xz",
},
"linux-sbsa": {
"relative_path": "libcusolver-linux-sbsa-11.6.0.99-archive.tar.xz",
},
},
}
_CUDNN_DIST_DICT = {
"cudnn": {
"linux-x86_64": {
"cuda12": {
"relative_path": "cudnn-linux-x86_64-9.0.0.312_cuda12-archive.tar.xz",
},
},
"linux-sbsa": {
"cuda12": {
"relative_path": "cudnn-linux-sbsa-9.0.0.312_cuda12-archive.tar.xz",
},
},
},
}
cudnn_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS | _CUDA_DIST_DICT,
cuda_redist_path_prefix = "file:///usr/Downloads/dists/",
)
cudnn_redist_init_repository(
cudnn_redistributions = CUDNN_REDISTRIBUTIONS | _CUDNN_DIST_DICT,
cudnn_redist_path_prefix = "file:///usr/Downloads/dists/cudnn/"
)
load(
"//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
"nccl_redist_init_repository",
)
_NCCL_WHEEL_DICT = {
"12.4.0": {
"x86_64-unknown-linux-gnu": {
"url": "https://files.pythonhosted.org/packages/38/00/d0d4e48aef772ad5aebcf70b73028f88db6e5640b36c38e90445b7a57c45/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl",
},
},
}
nccl_redist_init_repository(
cuda_nccl_wheels = CUDA_NCCL_WHEELS | _NCCL_WHEEL_DICT,
)
```
## DEPRECATED: Non-hermetic CUDA/CUDNN usage
Though non-hermetic CUDA/CUDNN usage is deprecated, it might be used for
some experiments currently unsupported officially (for example, building wheels
on Windows with CUDA).
Here are the steps to use non-hermetic CUDA installed locally in Google ML
projects:
1. Delete calls to hermetic CUDA repository rules from the `WORKSPACE`
file of the project dependent on XLA.
2. Add the calls to non-hermetic CUDA repository rules to the bottom of the
`WORKSPACE` file.
For XLA and JAX:
```
load("@local_tsl//third_party/gpus:cuda_configure.bzl", "cuda_configure")
cuda_configure(name = "local_config_cuda")
load("@local_tsl//third_party/nccl:nccl_configure.bzl", "nccl_configure")
nccl_configure(name = "local_config_nccl")
```
For Tensorflow:
```
load("@local_tsl//third_party/gpus:cuda_configure.bzl", "cuda_configure")
cuda_configure(name = "local_config_cuda")
load("@local_tsl//third_party/nccl:nccl_configure.bzl", "nccl_configure")
nccl_configure(name = "local_config_nccl")
```
3. Set the following environment variables directly in your shell or in
`.bazelrc` file as shown below:
```
build:cuda --action_env=TF_CUDA_VERSION=<locally installed cuda version>
build:cuda --action_env=TF_CUDNN_VERSION=<locally installed cudnn version>
build:cuda --action_env=TF_CUDA_COMPUTE_CAPABILITIES=<CUDA compute capabilities>
build:cuda --action_env=LD_LIBRARY_PATH=<CUDA/CUDNN libraries folder locations divided by : sign>
build:cuda --action_env=CUDA_TOOLKIT_PATH=<preinstalled CUDA folder location>
build:cuda --action_env=TF_CUDA_PATHS=<preinstalled CUDA/CUDNN folder locations divided by , sign>
build:cuda --action_env=NCCL_INSTALL_PATH=<preinstalled NCCL library folder location>
```
Note that `TF_CUDA_VERSION` and `TF_CUDNN_VERSION` should consist of major and
minor versions only (e.g. `12.3` for CUDA and `9.1` for CUDNN).
4. Now you can run `bazel` command to use locally installed CUDA and CUDNN.
For XLA, no changes in the command options are needed.
For JAX, use `--override_repository=tsl=<tsl_path>` flag in the Bazel command
options.
For Tensorflow, use `--override_repository=local_tsl=<tsl_path>` flag in the
Bazel command options.
## Configure hermetic CUDA
1. In the downstream project dependent on XLA, add the following lines to the
bottom of the `WORKSPACE` file:
Note: use @local_tsl instead of @tsl in Tensorflow project.
```
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
"cuda_json_init_repository",
)
cuda_json_init_repository()
load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"cuda_redist_init_repositories",
"cudnn_redist_init_repository",
)
cuda_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS,
)
cudnn_redist_init_repository(
cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
)
load(
"@local_tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
"cuda_configure",
)
cuda_configure(name = "local_config_cuda")
load(
"@local_tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
"nccl_redist_init_repository",
)
nccl_redist_init_repository()
load(
"@local_tsl//third_party/nccl/hermetic:nccl_configure.bzl",
"nccl_configure",
)
nccl_configure(name = "local_config_nccl")
```
2. To select specific versions of hermetic CUDA and CUDNN, set the
`HERMETIC_CUDA_VERSION` and `HERMETIC_CUDNN_VERSION` environment variables
respectively. Use only supported versions. You may set the environment
variables directly in your shell or in `.bazelrc` file as shown below:
```
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.1.1"
build:cuda --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_80,compute_90"
```
3. To enable Hermetic CUDA during test execution, or when running a binary via
bazel, make sure to add `--@local_config_cuda//cuda:include_hermetic_cuda_libs=true`
flag to your bazel command. You can provide it either directly in a shell or
in `.bazelrc`:
```
test:cuda --@local_config_cuda//cuda:include_hermetic_cuda_libs=true
```
The flag is needed to make sure that CUDA dependencies are properly provided
to test executables. The flag is false by default to avoid unwanted coupling
of Google-released Python wheels to CUDA binaries.

View File

@ -219,11 +219,16 @@ build:mkl_aarch64_threadpool -c opt
build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --repo_env TF_NEED_CUDA=1
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda build:cuda --@local_config_cuda//:enable_cuda
# Default CUDA and CUDNN versions.
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
# This flag is needed to include hermetic CUDA libraries for bazel tests.
test:cuda --@local_config_cuda//cuda:include_hermetic_cuda_libs=true
# CUDA: This config refers to building CUDA op kernels with clang. # CUDA: This config refers to building CUDA op kernels with clang.
build:cuda_clang --config=cuda build:cuda_clang --config=cuda
build:cuda_clang --action_env=TF_CUDA_CLANG="1"
build:cuda_clang --@local_config_cuda//:cuda_compiler=clang build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
build:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards). # Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds. # This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute # See https://developer.nvidia.com/cuda-gpus#compute
@ -232,22 +237,22 @@ build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
# release while SASS is only forward compatible inside the current # release while SASS is only forward compatible inside the current
# major release. Example: sm_80 kernels can run on sm_89 GPUs but # major release. Example: sm_80 kernels can run on sm_89 GPUs but
# not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs. # not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs.
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90" build:cuda_clang --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
# Set lld as the linker.
build:cuda_clang --host_linkopt="-fuse-ld=lld"
build:cuda_clang --host_linkopt="-lm"
build:cuda_clang --linkopt="-fuse-ld=lld"
build:cuda_clang --linkopt="-lm"
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12" build:cuda_clang_official --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8" build:cuda_clang_official --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain" build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"
# Build with nvcc for CUDA and clang for host # Build with nvcc for CUDA and clang for host
build:nvcc_clang --config=cuda build:nvcc_clang --config=cuda
# Unfortunately, cuda_configure.bzl demands this for using nvcc + clang
build:nvcc_clang --action_env=TF_CUDA_CLANG="1"
build:nvcc_clang --action_env=TF_NVCC_CLANG="1" build:nvcc_clang --action_env=TF_NVCC_CLANG="1"
build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc
@ -543,9 +548,6 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration # For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
build:rbe_linux_cuda_nvcc --config=nvcc_clang build:rbe_linux_cuda_nvcc --config=nvcc_clang
@ -630,7 +632,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/cla
# Test-related settings below this point. # Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS test:release_linux_base --local_test_jobs=HOST_CPUS
test:release_linux_base --test_env=LD_LIBRARY_PATH
# Give only the list of failed tests at the end of the log # Give only the list of failed tests at the end of the log
test:release_linux_base --test_summary=short test:release_linux_base --test_summary=short
@ -644,7 +645,6 @@ build:release_gpu_linux --config=release_cpu_linux
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
# Note that linux cpu and cuda builds share the same toolchain now. # Note that linux cpu and cuda builds share the same toolchain now.
build:release_gpu_linux --config=cuda_clang_official build:release_gpu_linux --config=cuda_clang_official
test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

View File

@ -50,3 +50,50 @@ tsl_workspace1()
load(":workspace0.bzl", "tsl_workspace0") load(":workspace0.bzl", "tsl_workspace0")
tsl_workspace0() tsl_workspace0()
load(
"//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
"cuda_json_init_repository",
)
cuda_json_init_repository()
load(
"@cuda_redist_json//:distributions.bzl",
"CUDA_REDISTRIBUTIONS",
"CUDNN_REDISTRIBUTIONS",
)
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
"cuda_redist_init_repositories",
"cudnn_redist_init_repository",
)
cuda_redist_init_repositories(
cuda_redistributions = CUDA_REDISTRIBUTIONS,
)
cudnn_redist_init_repository(
cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
)
load(
"//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
"cuda_configure",
)
cuda_configure(name = "local_config_cuda")
load(
"//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
"nccl_redist_init_repository",
)
nccl_redist_init_repository()
load(
"//third_party/nccl/hermetic:nccl_configure.bzl",
"nccl_configure",
)
nccl_configure(name = "local_config_nccl")

View File

@ -21,6 +21,7 @@ third_party/git/BUILD.tpl:
third_party/git/BUILD: third_party/git/BUILD:
third_party/git/git_configure.bzl: third_party/git/git_configure.bzl:
third_party/gpus/BUILD: third_party/gpus/BUILD:
third_party/gpus/compiler_common_tools.bzl:
third_party/gpus/crosstool/BUILD.rocm.tpl: third_party/gpus/crosstool/BUILD.rocm.tpl:
third_party/gpus/crosstool/BUILD.sycl.tpl: third_party/gpus/crosstool/BUILD.sycl.tpl:
third_party/gpus/crosstool/BUILD.tpl: third_party/gpus/crosstool/BUILD.tpl:
@ -38,6 +39,27 @@ third_party/gpus/cuda/LICENSE:
third_party/gpus/cuda/build_defs.bzl.tpl: third_party/gpus/cuda/build_defs.bzl.tpl:
third_party/gpus/cuda/cuda_config.h.tpl: third_party/gpus/cuda/cuda_config.h.tpl:
third_party/gpus/cuda/cuda_config.py.tpl: third_party/gpus/cuda/cuda_config.py.tpl:
third_party/gpus/cuda/hermetic/BUILD.tpl:
third_party/gpus/cuda/hermetic/BUILD:
third_party/gpus/cuda/hermetic/cuda_cccl.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_configure.bzl:
third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cudnn.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_json_init_repository.bzl:
third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_nvml.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_nvprune.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_nvtx.BUILD.tpl:
third_party/gpus/cuda/hermetic/cuda_redist_init_repositories.bzl:
third_party/gpus/cuda_configure.bzl: third_party/gpus/cuda_configure.bzl:
third_party/gpus/find_cuda_config:.py third_party/gpus/find_cuda_config:.py
third_party/gpus/rocm/BUILD.tpl: third_party/gpus/rocm/BUILD.tpl:
@ -67,6 +89,9 @@ third_party/nccl/archive.BUILD:
third_party/nccl/archive.patch: third_party/nccl/archive.patch:
third_party/nccl/build_defs.bzl.tpl: third_party/nccl/build_defs.bzl.tpl:
third_party/nccl/generated_names.bzl.tpl: third_party/nccl/generated_names.bzl.tpl:
third_party/nccl/hermetic/BUILD:
third_party/nccl/hermetic/cuda_nccl.BUILD.tpl:
third_party/nccl/hermetic/nccl_configure.bzl:
third_party/nccl/nccl_configure.bzl: third_party/nccl/nccl_configure.bzl:
third_party/nccl/system.BUILD.tpl: third_party/nccl/system.BUILD.tpl:
third_party/nvtx/BUILD: third_party/nvtx/BUILD:

View File

@ -14,6 +14,9 @@
# ============================================================================== # ==============================================================================
"""Verifies that a list of libraries is installed on the system. """Verifies that a list of libraries is installed on the system.
NB: DEPRECATED! This script is a part of the deprecated `cuda_configure` rule.
Please use `hermetic/cuda_configure` instead.
Takes a list of arguments with every two subsequent arguments being a logical Takes a list of arguments with every two subsequent arguments being a logical
tuple of (path, check_soname). The path to the library and either True or False tuple of (path, check_soname). The path to the library and either True or False
to indicate whether to check the soname field on the shared library. to indicate whether to check the soname field on the shared library.

View File

@ -0,0 +1,174 @@
"""Common compiler functions. """
load(
"//third_party/remote_config:common.bzl",
"err_out",
"raw_exec",
"realpath",
)
def to_list_of_strings(elements):
"""Convert the list of ["a", "b", "c"] into '"a", "b", "c"'.
This is to be used to put a list of strings into the bzl file templates
so it gets interpreted as list of strings in Starlark.
Args:
elements: list of string elements
Returns:
single string of elements wrapped in quotes separated by a comma."""
quoted_strings = ["\"" + element + "\"" for element in elements]
return ", ".join(quoted_strings)
_INC_DIR_MARKER_BEGIN = "#include <...>"
# OSX add " (framework directory)" at the end of line, strip it.
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
# TODO(dzc): Once these functions have been factored out of Bazel's
# cc_configure.bzl, load them from @bazel_tools instead.
def _cxx_inc_convert(path):
"""Convert path returned by cc -E xc++ in a complete path."""
path = path.strip()
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
return path
def _normalize_include_path(repository_ctx, path):
"""Normalizes include paths before writing them to the crosstool.
If path points inside the 'crosstool' folder of the repository, a relative
path is returned.
If path points outside the 'crosstool' folder, an absolute path is returned.
"""
path = str(repository_ctx.path(path))
crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
if path.startswith(crosstool_folder):
# We drop the path to "$REPO/crosstool" and a trailing path separator.
return path[len(crosstool_folder) + 1:]
return path
def _is_compiler_option_supported(repository_ctx, cc, option):
"""Checks that `option` is supported by the C compiler. Doesn't %-escape the option."""
result = repository_ctx.execute([
cc,
option,
"-o",
"/dev/null",
"-c",
str(repository_ctx.path("tools/cpp/empty.cc")),
])
return result.stderr.find(option) == -1
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sys_root):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"
sysroot = []
if tf_sys_root:
sysroot += ["--sysroot", tf_sys_root]
result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] +
sysroot)
stderr = err_out(result)
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
return []
index1 = stderr.find("\n", index1)
if index1 == -1:
return []
index2 = stderr.rfind("\n ")
if index2 == -1 or index2 < index1:
return []
index2 = stderr.find("\n", index2 + 1)
if index2 == -1:
inc_dirs = stderr[index1 + 1:]
else:
inc_dirs = stderr[index1 + 1:index2].strip()
print_resource_dir_supported = _is_compiler_option_supported(
repository_ctx,
cc,
"-print-resource-dir",
)
if print_resource_dir_supported:
resource_dir = repository_ctx.execute(
[cc, "-print-resource-dir"],
).stdout.strip() + "/share"
inc_dirs += "\n" + resource_dir
compiler_includes = [
_normalize_include_path(repository_ctx, _cxx_inc_convert(p))
for p in inc_dirs.split("\n")
]
# The compiler might be on a symlink, e.g. /symlink -> /opt/gcc
# The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11)
# but Bazel might encounter either (usually reported by the compiler)
# especially when a compiler wrapper (e.g. ccache) is used.
# So we need to also include paths where symlinks are not resolved.
# Try to find real path to CC installation to "see through" compiler wrappers
# GCC has the path to g++
index1 = result.stderr.find("COLLECT_GCC=")
if index1 != -1:
index1 = result.stderr.find("=", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname
else:
# Clang has the directory
index1 = result.stderr.find("InstalledDir: ")
if index1 != -1:
index1 = result.stderr.find(" ", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname
else:
# Fallback to the CC path
cc_topdir = repository_ctx.path(cc).dirname.dirname
# We now have the compiler installation prefix, e.g. /symlink/gcc
# And the resolved installation prefix, e.g. /opt/gcc
cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip()
cc_topdir = str(cc_topdir).strip()
# If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept.
# e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path]
# adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include]
if cc_topdir_resolved != cc_topdir:
unresolved_compiler_includes = [
cc_topdir + inc[len(cc_topdir_resolved):]
for inc in compiler_includes
if inc.startswith(cc_topdir_resolved)
]
compiler_includes = compiler_includes + unresolved_compiler_includes
return compiler_includes
def get_cxx_inc_directories(repository_ctx, cc, tf_sys_root):
"""Compute the list of default C and C++ include directories."""
# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
True,
tf_sys_root,
)
includes_c = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
False,
tf_sys_root,
)
return includes_cpp + [
inc
for inc in includes_c
if inc not in includes_cpp
]

View File

@ -2,6 +2,7 @@
# Update cuda_configure.bzl#verify_build_defines when adding new variables. # Update cuda_configure.bzl#verify_build_defines when adding new variables.
load(":cc_toolchain_config.bzl", "cc_toolchain_config") load(":cc_toolchain_config.bzl", "cc_toolchain_config")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
licenses(["restricted"]) licenses(["restricted"])
@ -133,9 +134,17 @@ filegroup(
srcs = [], srcs = [],
) )
filegroup(
name = "cuda_nvcc_files",
srcs = %{cuda_nvcc_files},
)
filegroup( filegroup(
name = "crosstool_wrapper_driver_is_not_gcc", name = "crosstool_wrapper_driver_is_not_gcc",
srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], srcs = [
":cuda_nvcc_files",
":clang/bin/crosstool_wrapper_driver_is_not_gcc"
],
) )
filegroup( filegroup(

View File

@ -1,6 +1,10 @@
# NB: DEPRECATED! This file is a part of the deprecated `cuda_configure` rule.
# Please use `hermetic/cuda_configure` instead.
load(":build_defs.bzl", "cuda_header_library") load(":build_defs.bzl", "cuda_header_library")
load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//lib:selects.bzl", "selects")
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "bool_setting")
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
@ -144,7 +148,6 @@ cc_library(
name = "cusolver", name = "cusolver",
srcs = ["cuda/lib/%{cusolver_lib}"], srcs = ["cuda/lib/%{cusolver_lib}"],
data = ["cuda/lib/%{cusolver_lib}"], data = ["cuda/lib/%{cusolver_lib}"],
linkopts = ["-lgomp"],
linkstatic = 1, linkstatic = 1,
) )
@ -220,7 +223,6 @@ cc_library(
name = "cusparse", name = "cusparse",
srcs = ["cuda/lib/%{cusparse_lib}"], srcs = ["cuda/lib/%{cusparse_lib}"],
data = ["cuda/lib/%{cusparse_lib}"], data = ["cuda/lib/%{cusparse_lib}"],
linkopts = ["-lgomp"],
linkstatic = 1, linkstatic = 1,
) )
@ -242,6 +244,41 @@ py_library(
srcs = ["cuda/cuda_config.py"], srcs = ["cuda/cuda_config.py"],
) )
# Build setting that is always true (i.e. it can not be changed on the
# command line). It is used to create the config settings below that are
# always or never satisfied.
bool_setting(
name = "true_setting",
visibility = ["//visibility:private"],
build_setting_default = True,
)
# Config settings whether TensorFlow is built with hermetic CUDA.
# These configs are never satisfied.
config_setting(
name = "hermetic_cuda_tools",
flag_values = {":true_setting": "False"},
)
# Flag indicating if we should include hermetic CUDA libs.
bool_flag(
name = "include_hermetic_cuda_libs",
build_setting_default = False,
)
config_setting(
name = "hermetic_cuda_libs",
flag_values = {":true_setting": "False"},
)
selects.config_setting_group(
name = "hermetic_cuda_tools_and_libs",
match_all = [
":hermetic_cuda_libs",
":hermetic_cuda_tools"
],
)
%{copy_rules} %{copy_rules}
cc_library( cc_library(

View File

@ -1,3 +1,7 @@
# NB: DEPRECATED! This file is a part of the deprecated `cuda_configure` rule.
# Hermetic CUDA repository rule doesn't support Windows.
# Please use `hermetic/cuda_configure`.
load(":build_defs.bzl", "cuda_header_library") load(":build_defs.bzl", "cuda_header_library")
load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//lib:selects.bzl", "selects")

View File

@ -0,0 +1,261 @@
load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
load("@bazel_skylib//lib:selects.bzl", "selects")
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
package(default_visibility = ["//visibility:public"])
# Config setting whether TensorFlow is built with CUDA support using clang.
#
# TODO(b/174244321), DEPRECATED: this target will be removed when all users
# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_clang.
selects.config_setting_group(
name = "using_clang",
match_all = [
"@local_config_cuda//:is_cuda_enabled",
"@local_config_cuda//:is_cuda_compiler_clang",
],
)
# Config setting whether TensorFlow is built with CUDA support using nvcc.
#
# TODO(b/174244321), DEPRECATED: this target will be removed when all users
# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_nvcc.
selects.config_setting_group(
name = "using_nvcc",
match_all = [
"@local_config_cuda//:is_cuda_enabled",
"@local_config_cuda//:is_cuda_compiler_nvcc",
],
)
# Equivalent to using_clang && -c opt.
selects.config_setting_group(
name = "using_clang_opt",
match_all = [
":using_clang",
":_opt",
],
)
config_setting(
name = "_opt",
values = {"compilation_mode": "opt"},
)
# Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"'
# All clients including TensorFlow should use these directives.
cc_library(
name = "cuda_headers",
hdrs = [
"cuda/cuda_config.h",
],
include_prefix = "third_party/gpus",
includes = [
".", # required to include cuda/cuda/cuda_config.h as cuda/config.h
],
deps = [":cudart_headers",
":cublas_headers",
":cccl_headers",
":nvtx_headers",
":nvcc_headers",
":cusolver_headers",
":cufft_headers",
":cusparse_headers",
":curand_headers",
":cupti_headers",
":nvml_headers"],
)
cc_library(
name = "cudart_static",
srcs = ["@cuda_cudart//:static"],
linkopts = [
"-ldl",
"-lpthread",
%{cudart_static_linkopt}
],
)
alias(
name = "cuda_driver",
actual = "@cuda_cudart//:cuda_driver",
)
alias(
name = "cudart_headers",
actual = "@cuda_cudart//:headers",
)
alias(
name = "cudart",
actual = "@cuda_cudart//:cudart",
)
alias(
name = "nvtx_headers",
actual = "@cuda_nvtx//:headers",
)
alias(
name = "nvml_headers",
actual = "@cuda_nvml//:headers",
)
alias(
name = "nvcc_headers",
actual = "@cuda_nvcc//:headers",
)
alias(
name = "cccl_headers",
actual = "@cuda_cccl//:headers",
)
alias(
name = "cublas_headers",
actual = "@cuda_cublas//:headers",
)
alias(
name = "cusolver_headers",
actual = "@cuda_cusolver//:headers",
)
alias(
name = "cufft_headers",
actual = "@cuda_cufft//:headers",
)
alias(
name = "cusparse_headers",
actual = "@cuda_cusparse//:headers",
)
alias(
name = "curand_headers",
actual = "@cuda_curand//:headers",
)
alias(
name = "cublas",
actual = "@cuda_cublas//:cublas",
)
alias(
name = "cublasLt",
actual = "@cuda_cublas//:cublasLt",
)
alias(
name = "cusolver",
actual = "@cuda_cusolver//:cusolver",
)
alias(
name = "cudnn",
actual = "@cuda_cudnn//:cudnn",
)
alias(
name = "cudnn_header",
actual = "@cuda_cudnn//:headers",
)
alias(
name = "cufft",
actual = "@cuda_cufft//:cufft",
)
alias(
name = "curand",
actual = "@cuda_curand//:curand",
)
cc_library(
name = "cuda",
deps = [
":cublas",
":cublasLt",
":cuda_headers",
":cudart",
":cudnn",
":cufft",
":curand",
],
)
alias(
name = "cub_headers",
actual = ":cuda_headers",
)
alias(
name = "cupti_headers",
actual = "@cuda_cupti//:headers",
)
alias(
name = "cupti_dsos",
actual = "@cuda_cupti//:cupti",
)
alias(
name = "cusparse",
actual = "@cuda_cusparse//:cusparse",
)
alias(
name = "cuda-nvvm",
actual = "@cuda_nvcc//:nvvm",
)
cc_library(
name = "libdevice_root",
data = [":cuda-nvvm"],
)
bzl_library(
name = "build_defs_bzl",
srcs = ["build_defs.bzl"],
deps = [
"@bazel_skylib//lib:selects",
],
)
py_library(
name = "cuda_config_py",
srcs = ["cuda/cuda_config.py"],
)
# Config setting whether TensorFlow is built with hermetic CUDA.
alias(
name = "hermetic_cuda_tools",
actual = "@local_config_cuda//:is_cuda_enabled",
)
# Flag indicating if we should include hermetic CUDA libs.
bool_flag(
name = "include_hermetic_cuda_libs",
build_setting_default = False,
)
config_setting(
name = "hermetic_cuda_libs",
flag_values = {":include_hermetic_cuda_libs": "True"},
)
selects.config_setting_group(
name = "hermetic_cuda_tools_and_libs",
match_all = [
":hermetic_cuda_libs",
":hermetic_cuda_tools"
],
)
cc_library(
# This is not yet fully supported, but we need the rule
# to make bazel query happy.
name = "nvptxcompiler",
)

View File

@ -0,0 +1,15 @@
licenses(["restricted"]) # NVIDIA proprietary license
cc_library(
name = "headers",
hdrs = glob([
%{comment}"include/cub/**",
%{comment}"include/cuda/**",
%{comment}"include/nv/**",
%{comment}"include/thrust/**",
]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,521 @@
"""Repository rule for hermetic CUDA autoconfiguration.
`cuda_configure` depends on the following environment variables:
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
* `TF_NVCC_CLANG`: Whether to use clang for C++ and NVCC for Cuda compilation.
* `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
both host and device code compilation.
* `TF_SYSROOT`: The sysroot to use when compiling.
* `HERMETIC_CUDA_VERSION`: The version of the CUDA toolkit. If not specified,
the version will be determined by the `TF_CUDA_VERSION`.
* `HERMETIC_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default
is `3.5,5.2`. If not specified, the value will be determined by the
`TF_CUDA_COMPUTE_CAPABILITIES`.
* `PYTHON_BIN_PATH`: The python binary path
"""
load(
"//third_party/gpus:compiler_common_tools.bzl",
"get_cxx_inc_directories",
"to_list_of_strings",
)
load(
"//third_party/remote_config:common.bzl",
"get_cpu_value",
"get_host_environ",
"which",
)
def _find_cc(repository_ctx):
"""Find the C++ compiler."""
cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
cc_name = "clang"
cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar)
if cc_name_from_env:
cc_name = cc_name_from_env
if cc_name.startswith("/"):
# Return the absolute path.
return cc_name
cc = which(repository_ctx, cc_name)
if cc == None:
fail(("Cannot find {}, either correct your path or set the {}" +
" environment variable").format(cc_name, cc_path_envvar))
return cc
def _auto_configure_fail(msg):
"""Output failure message when cuda configuration fails."""
red = "\033[0;31m"
no_color = "\033[0m"
fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
def _verify_build_defines(params):
"""Verify all variables that crosstool/BUILD.tpl expects are substituted.
Args:
params: dict of variables that will be passed to the BUILD.tpl template.
"""
missing = []
for param in [
"cxx_builtin_include_directories",
"extra_no_canonical_prefixes_flags",
"host_compiler_path",
"host_compiler_prefix",
"host_compiler_warnings",
"linker_bin_path",
"compiler_deps",
"msvc_cl_path",
"msvc_env_include",
"msvc_env_lib",
"msvc_env_path",
"msvc_env_tmp",
"msvc_lib_path",
"msvc_link_path",
"msvc_ml_path",
"unfiltered_compile_flags",
"win_compiler_deps",
]:
if ("%{" + param + "}") not in params:
missing.append(param)
if missing:
_auto_configure_fail(
"BUILD.tpl template is missing these variables: " +
str(missing) +
".\nWe only got: " +
str(params) +
".",
)
def get_cuda_version(repository_ctx):
return (get_host_environ(repository_ctx, HERMETIC_CUDA_VERSION) or
get_host_environ(repository_ctx, TF_CUDA_VERSION))
def enable_cuda(repository_ctx):
"""Returns whether to build with CUDA support."""
return int(get_host_environ(repository_ctx, TF_NEED_CUDA, False))
def _flag_enabled(repository_ctx, flag_name):
return get_host_environ(repository_ctx, flag_name) == "1"
def _use_nvcc_and_clang(repository_ctx):
# Returns the flag if we need to use clang for C++ and NVCC for Cuda.
return _flag_enabled(repository_ctx, _TF_NVCC_CLANG)
def _tf_sysroot(repository_ctx):
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
def _py_tmpl_dict(d):
return {"%{cuda_config}": str(d)}
def _cudart_static_linkopt(cpu_value):
"""Returns additional platform-specific linkopts for cudart."""
return "\"\"," if cpu_value == "Darwin" else "\"-lrt\","
def _compute_capabilities(repository_ctx):
"""Returns a list of strings representing cuda compute capabilities.
Args:
repository_ctx: the repo rule's context.
Returns:
list of cuda architectures to compile for. 'compute_xy' refers to
both PTX and SASS, 'sm_xy' refers to SASS only.
"""
capabilities = (get_host_environ(
repository_ctx,
_HERMETIC_CUDA_COMPUTE_CAPABILITIES,
) or
get_host_environ(
repository_ctx,
_TF_CUDA_COMPUTE_CAPABILITIES,
))
capabilities = (capabilities or "compute_35,compute_52").split(",")
# Map old 'x.y' capabilities to 'compute_xy'.
if len(capabilities) > 0 and all([len(x.split(".")) == 2 for x in capabilities]):
# If all capabilities are in 'x.y' format, only include PTX for the
# highest capability.
cc_list = sorted([x.replace(".", "") for x in capabilities])
capabilities = [
"sm_%s" % x
for x in cc_list[:-1]
] + ["compute_%s" % cc_list[-1]]
for i, capability in enumerate(capabilities):
parts = capability.split(".")
if len(parts) != 2:
continue
capabilities[i] = "compute_%s%s" % (parts[0], parts[1])
# Make list unique
capabilities = dict(zip(capabilities, capabilities)).keys()
# Validate capabilities.
for capability in capabilities:
if not capability.startswith(("compute_", "sm_")):
_auto_configure_fail("Invalid compute capability: %s" % capability)
for prefix in ["compute_", "sm_"]:
if not capability.startswith(prefix):
continue
if len(capability) == len(prefix) + 2 and capability[-2:].isdigit():
continue
if len(capability) == len(prefix) + 3 and capability.endswith("90a"):
continue
_auto_configure_fail("Invalid compute capability: %s" % capability)
return capabilities
def _compute_cuda_extra_copts(compute_capabilities):
copts = ["--no-cuda-include-ptx=all"]
for capability in compute_capabilities:
if capability.startswith("compute_"):
capability = capability.replace("compute_", "sm_")
copts.append("--cuda-include-ptx=%s" % capability)
copts.append("--cuda-gpu-arch=%s" % capability)
return str(copts)
def _get_cuda_config(repository_ctx):
"""Detects and returns information about the CUDA installation on the system.
Args:
repository_ctx: The repository context.
Returns:
A struct containing the following fields:
cuda_version: The version of CUDA on the system.
cudart_version: The CUDA runtime version on the system.
cudnn_version: The version of cuDNN on the system.
compute_capabilities: A list of the system's CUDA compute capabilities.
cpu_value: The name of the host operating system.
"""
return struct(
cuda_version = get_cuda_version(repository_ctx),
cupti_version = repository_ctx.read(repository_ctx.attr.cupti_version),
cudart_version = repository_ctx.read(repository_ctx.attr.cudart_version),
cublas_version = repository_ctx.read(repository_ctx.attr.cublas_version),
cusolver_version = repository_ctx.read(repository_ctx.attr.cusolver_version),
curand_version = repository_ctx.read(repository_ctx.attr.curand_version),
cufft_version = repository_ctx.read(repository_ctx.attr.cufft_version),
cusparse_version = repository_ctx.read(repository_ctx.attr.cusparse_version),
cudnn_version = repository_ctx.read(repository_ctx.attr.cudnn_version),
compute_capabilities = _compute_capabilities(repository_ctx),
cpu_value = get_cpu_value(repository_ctx),
)
_DUMMY_CROSSTOOL_BZL_FILE = """
def error_gpu_disabled():
fail("ERROR: Building with --config=cuda but TensorFlow is not configured " +
"to build with GPU support. Please re-run ./configure and enter 'Y' " +
"at the prompt to build with GPU support.")
native.genrule(
name = "error_gen_crosstool",
outs = ["CROSSTOOL"],
cmd = "echo 'Should not be run.' && exit 1",
)
native.filegroup(
name = "crosstool",
srcs = [":CROSSTOOL"],
output_licenses = ["unencumbered"],
)
"""
_DUMMY_CROSSTOOL_BUILD_FILE = """
load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
error_gpu_disabled()
"""
def _create_dummy_repository(repository_ctx):
cpu_value = get_cpu_value(repository_ctx)
# Set up BUILD file for cuda/.
repository_ctx.template(
"cuda/build_defs.bzl",
repository_ctx.attr.build_defs_tpl,
{
"%{cuda_is_configured}": "False",
"%{cuda_extra_copts}": "[]",
"%{cuda_gpu_architectures}": "[]",
"%{cuda_version}": "0.0",
},
)
repository_ctx.template(
"cuda/BUILD",
repository_ctx.attr.cuda_build_tpl,
{
"%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
},
)
# Set up cuda_config.h, which is used by
# tensorflow/compiler/xla/stream_executor/dso_loader.cc.
repository_ctx.template(
"cuda/cuda/cuda_config.h",
repository_ctx.attr.cuda_config_tpl,
{
"%{cuda_version}": "",
"%{cudart_version}": "",
"%{cupti_version}": "",
"%{cublas_version}": "",
"%{cusolver_version}": "",
"%{curand_version}": "",
"%{cufft_version}": "",
"%{cusparse_version}": "",
"%{cudnn_version}": "",
"%{cuda_toolkit_path}": "",
"%{cuda_compute_capabilities}": "",
},
)
# Set up cuda_config.py, which is used by gen_build_info to provide
# static build environment info to the API
repository_ctx.template(
"cuda/cuda/cuda_config.py",
repository_ctx.attr.cuda_config_py_tpl,
_py_tmpl_dict({}),
)
# If cuda_configure is not configured to build with GPU support, and the user
# attempts to build with --config=cuda, add a dummy build rule to intercept
# this and fail with an actionable error message.
repository_ctx.file(
"crosstool/error_gpu_disabled.bzl",
_DUMMY_CROSSTOOL_BZL_FILE,
)
repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
def _create_local_cuda_repository(repository_ctx):
"""Creates the repository containing files set up to build with CUDA."""
cuda_config = _get_cuda_config(repository_ctx)
# Set up BUILD file for cuda/
repository_ctx.template(
"cuda/build_defs.bzl",
repository_ctx.attr.build_defs_tpl,
{
"%{cuda_is_configured}": "True",
"%{cuda_extra_copts}": _compute_cuda_extra_copts(
cuda_config.compute_capabilities,
),
"%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities),
"%{cuda_version}": cuda_config.cuda_version,
},
)
repository_ctx.template(
"cuda/BUILD",
repository_ctx.attr.cuda_build_tpl,
{
"%{cudart_static_linkopt}": _cudart_static_linkopt(
cuda_config.cpu_value,
),
},
)
is_nvcc_and_clang = _use_nvcc_and_clang(repository_ctx)
tf_sysroot = _tf_sysroot(repository_ctx)
# Set up crosstool/
cc = _find_cc(repository_ctx)
host_compiler_includes = get_cxx_inc_directories(
repository_ctx,
cc,
tf_sysroot,
)
cuda_defines = {}
# We do not support hermetic CUDA on Windows.
# This ensures the CROSSTOOL file parser is happy.
cuda_defines.update({
"%{msvc_env_tmp}": "msvc_not_used",
"%{msvc_env_path}": "msvc_not_used",
"%{msvc_env_include}": "msvc_not_used",
"%{msvc_env_lib}": "msvc_not_used",
"%{msvc_cl_path}": "msvc_not_used",
"%{msvc_ml_path}": "msvc_not_used",
"%{msvc_link_path}": "msvc_not_used",
"%{msvc_lib_path}": "msvc_not_used",
"%{win_compiler_deps}": ":empty",
})
cuda_defines["%{builtin_sysroot}"] = tf_sysroot
cuda_defines["%{cuda_toolkit_path}"] = repository_ctx.attr.nvcc_binary.workspace_root
cuda_defines["%{compiler}"] = "clang"
cuda_defines["%{host_compiler_prefix}"] = "/usr/bin"
cuda_defines["%{linker_bin_path}"] = ""
cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
cuda_defines["%{unfiltered_compile_flags}"] = ""
cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(
host_compiler_includes,
)
cuda_defines["%{cuda_nvcc_files}"] = "if_cuda([\"@{nvcc_archive}//:bin\", \"@{nvcc_archive}//:nvvm\"])".format(
nvcc_archive = repository_ctx.attr.nvcc_binary.repo_name,
)
if not is_nvcc_and_clang:
cuda_defines["%{host_compiler_path}"] = str(cc)
cuda_defines["%{host_compiler_warnings}"] = """
# Some parts of the codebase set -Werror and hit this warning, so
# switch it off for now.
"-Wno-invalid-partial-specialization"
"""
cuda_defines["%{compiler_deps}"] = ":cuda_nvcc_files"
repository_ctx.file(
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
"",
)
else:
cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
cuda_defines["%{host_compiler_warnings}"] = ""
nvcc_relative_path = "%s/%s" % (
repository_ctx.attr.nvcc_binary.workspace_root,
repository_ctx.attr.nvcc_binary.name,
)
cuda_defines["%{compiler_deps}"] = ":crosstool_wrapper_driver_is_not_gcc"
wrapper_defines = {
"%{cpu_compiler}": str(cc),
"%{cuda_version}": cuda_config.cuda_version,
"%{nvcc_path}": nvcc_relative_path,
"%{host_compiler_path}": str(cc),
"%{use_clang_compiler}": "True",
}
repository_ctx.template(
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
repository_ctx.attr.crosstool_wrapper_driver_is_not_gcc_tpl,
wrapper_defines,
)
_verify_build_defines(cuda_defines)
# Only expand template variables in the BUILD file
repository_ctx.template(
"crosstool/BUILD",
repository_ctx.attr.crosstool_build_tpl,
cuda_defines,
)
# No templating of cc_toolchain_config - use attributes and templatize the
# BUILD file.
repository_ctx.template(
"crosstool/cc_toolchain_config.bzl",
repository_ctx.attr.cc_toolchain_config_tpl,
{},
)
# Set up cuda_config.h, which is used by
# tensorflow/compiler/xla/stream_executor/dso_loader.cc.
repository_ctx.template(
"cuda/cuda/cuda_config.h",
repository_ctx.attr.cuda_config_tpl,
{
"%{cuda_version}": cuda_config.cuda_version,
"%{cudart_version}": cuda_config.cudart_version,
"%{cupti_version}": cuda_config.cupti_version,
"%{cublas_version}": cuda_config.cublas_version,
"%{cusolver_version}": cuda_config.cusolver_version,
"%{curand_version}": cuda_config.curand_version,
"%{cufft_version}": cuda_config.cufft_version,
"%{cusparse_version}": cuda_config.cusparse_version,
"%{cudnn_version}": cuda_config.cudnn_version,
"%{cuda_toolkit_path}": "",
"%{cuda_compute_capabilities}": ", ".join([
cc.split("_")[1]
for cc in cuda_config.compute_capabilities
]),
},
)
# Set up cuda_config.py, which is used by gen_build_info to provide
# static build environment info to the API
repository_ctx.template(
"cuda/cuda/cuda_config.py",
repository_ctx.attr.cuda_config_py_tpl,
_py_tmpl_dict({
"cuda_version": cuda_config.cuda_version,
"cudnn_version": cuda_config.cudnn_version,
"cuda_compute_capabilities": cuda_config.compute_capabilities,
"cpu_compiler": str(cc),
}),
)
def _cuda_autoconf_impl(repository_ctx):
"""Implementation of the cuda_autoconf repository rule."""
build_file = repository_ctx.attr.local_config_cuda_build_file
if not enable_cuda(repository_ctx):
_create_dummy_repository(repository_ctx)
else:
_create_local_cuda_repository(repository_ctx)
repository_ctx.symlink(build_file, "BUILD")
_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
_PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
_HERMETIC_CUDA_COMPUTE_CAPABILITIES = "HERMETIC_CUDA_COMPUTE_CAPABILITIES"
_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
HERMETIC_CUDA_VERSION = "HERMETIC_CUDA_VERSION"
TF_CUDA_VERSION = "TF_CUDA_VERSION"
TF_NEED_CUDA = "TF_NEED_CUDA"
_TF_NVCC_CLANG = "TF_NVCC_CLANG"
_TF_SYSROOT = "TF_SYSROOT"
_ENVIRONS = [
_CLANG_CUDA_COMPILER_PATH,
TF_NEED_CUDA,
_TF_NVCC_CLANG,
TF_CUDA_VERSION,
HERMETIC_CUDA_VERSION,
_TF_CUDA_COMPUTE_CAPABILITIES,
_HERMETIC_CUDA_COMPUTE_CAPABILITIES,
_TF_SYSROOT,
_PYTHON_BIN_PATH,
"TMP",
"TMPDIR",
"LOCAL_CUDA_PATH",
"LOCAL_CUDNN_PATH",
]
cuda_configure = repository_rule(
implementation = _cuda_autoconf_impl,
environ = _ENVIRONS,
attrs = {
"environ": attr.string_dict(),
"cublas_version": attr.label(default = Label("@cuda_cublas//:version.txt")),
"cudart_version": attr.label(default = Label("@cuda_cudart//:version.txt")),
"cudnn_version": attr.label(default = Label("@cuda_cudnn//:version.txt")),
"cufft_version": attr.label(default = Label("@cuda_cufft//:version.txt")),
"cupti_version": attr.label(default = Label("@cuda_cupti//:version.txt")),
"curand_version": attr.label(default = Label("@cuda_curand//:version.txt")),
"cusolver_version": attr.label(default = Label("@cuda_cusolver//:version.txt")),
"cusparse_version": attr.label(default = Label("@cuda_cusparse//:version.txt")),
"nvcc_binary": attr.label(default = Label("@cuda_nvcc//:bin/nvcc")),
"local_config_cuda_build_file": attr.label(default = Label("//third_party/gpus:local_config_cuda.BUILD")),
"build_defs_tpl": attr.label(default = Label("//third_party/gpus/cuda:build_defs.bzl.tpl")),
"cuda_build_tpl": attr.label(default = Label("//third_party/gpus/cuda/hermetic:BUILD.tpl")),
"cuda_config_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.h.tpl")),
"cuda_config_py_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.py.tpl")),
"crosstool_wrapper_driver_is_not_gcc_tpl": attr.label(default = Label("//third_party/gpus/crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl")),
"crosstool_build_tpl": attr.label(default = Label("//third_party/gpus/crosstool:BUILD.tpl")),
"cc_toolchain_config_tpl": attr.label(default = Label("//third_party/gpus/crosstool:cc_toolchain_config.bzl.tpl")),
},
)
"""Detects and configures the hermetic CUDA toolchain.
Add the following to your WORKSPACE file:
```python
cuda_configure(name = "local_config_cuda")
```
Args:
name: A unique name for this workspace rule.
""" # buildifier: disable=no-effect

View File

@ -0,0 +1,44 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cublas_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcublas.so.%{libcublas_version}",
deps = [":cublasLt"],
)
cc_import(
name = "cublasLt_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcublasLt.so.%{libcublaslt_version}",
)
%{multiline_comment}
cc_library(
name = "cublas",
visibility = ["//visibility:public"],
%{comment}deps = [":cublas_shared_library"],
)
cc_library(
name = "cublasLt",
visibility = ["//visibility:public"],
%{comment}deps = [":cublasLt_shared_library"],
)
cc_library(
name = "headers",
%{comment}hdrs = [
%{comment}"include/cublas.h",
%{comment}"include/cublasLt.h",
%{comment}"include/cublas_api.h",
%{comment}"include/cublas_v2.h",
%{comment}],
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,126 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
filegroup(
name = "static",
srcs = ["lib/libcudart_static.a"],
visibility = ["@local_config_cuda//cuda:__pkg__"],
)
%{multiline_comment}
# TODO: Replace system provided library with hermetic NVIDIA driver library.
cc_import(
name = "cuda_driver_shared_library",
interface_library = "lib/stubs/libcuda.so",
system_provided = 1,
)
cc_import(
name = "cudart_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcudart.so.%{libcudart_version}",
)
%{multiline_comment}
cc_library(
name = "cuda_driver",
%{comment}deps = [":cuda_driver_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "cudart",
%{comment}deps = [
%{comment}":cuda_driver",
%{comment}":cudart_shared_library",
%{comment}],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/builtin_types.h",
%{comment}"include/channel_descriptor.h",
%{comment}"include/common_functions.h",
%{comment}"include/cooperative_groups/**",
%{comment}"include/cooperative_groups.h",
%{comment}"include/cuComplex.h",
%{comment}"include/cuda.h",
%{comment}"include/cudaEGL.h",
%{comment}"include/cudaEGLTypedefs.h",
%{comment}"include/cudaGL.h",
%{comment}"include/cudaGLTypedefs.h",
%{comment}"include/cudaProfilerTypedefs.h",
%{comment}"include/cudaTypedefs.h",
%{comment}"include/cudaVDPAU.h",
%{comment}"include/cudaVDPAUTypedefs.h",
%{comment}"include/cuda_awbarrier.h",
%{comment}"include/cuda_awbarrier_helpers.h",
%{comment}"include/cuda_awbarrier_primitives.h",
%{comment}"include/cuda_bf16.h",
%{comment}"include/cuda_bf16.hpp",
%{comment}"include/cuda_device_runtime_api.h",
%{comment}"include/cuda_egl_interop.h",
%{comment}"include/cuda_fp16.h",
%{comment}"include/cuda_fp16.hpp",
%{comment}"include/cuda_fp8.h",
%{comment}"include/cuda_fp8.hpp",
%{comment}"include/cuda_gl_interop.h",
%{comment}"include/cuda_occupancy.h",
%{comment}"include/cuda_pipeline.h",
%{comment}"include/cuda_pipeline_helpers.h",
%{comment}"include/cuda_pipeline_primitives.h",
%{comment}"include/cuda_runtime.h",
%{comment}"include/cuda_runtime_api.h",
%{comment}"include/cuda_surface_types.h",
%{comment}"include/cuda_texture_types.h",
%{comment}"include/cuda_vdpau_interop.h",
%{comment}"include/cudart_platform.h",
%{comment}"include/device_atomic_functions.h",
%{comment}"include/device_atomic_functions.hpp",
%{comment}"include/device_double_functions.h",
%{comment}"include/device_functions.h",
%{comment}"include/device_launch_parameters.h",
%{comment}"include/device_types.h",
%{comment}"include/driver_functions.h",
%{comment}"include/driver_types.h",
%{comment}"include/host_config.h",
%{comment}"include/host_defines.h",
%{comment}"include/library_types.h",
%{comment}"include/math_constants.h",
%{comment}"include/math_functions.h",
%{comment}"include/mma.h",
%{comment}"include/nvfunctional",
%{comment}"include/sm_20_atomic_functions.h",
%{comment}"include/sm_20_atomic_functions.hpp",
%{comment}"include/sm_20_intrinsics.h",
%{comment}"include/sm_20_intrinsics.hpp",
%{comment}"include/sm_30_intrinsics.h",
%{comment}"include/sm_30_intrinsics.hpp",
%{comment}"include/sm_32_atomic_functions.h",
%{comment}"include/sm_32_atomic_functions.hpp",
%{comment}"include/sm_32_intrinsics.h",
%{comment}"include/sm_32_intrinsics.hpp",
%{comment}"include/sm_35_atomic_functions.h",
%{comment}"include/sm_35_intrinsics.h",
%{comment}"include/sm_60_atomic_functions.h",
%{comment}"include/sm_60_atomic_functions.hpp",
%{comment}"include/sm_61_intrinsics.h",
%{comment}"include/sm_61_intrinsics.hpp",
%{comment}"include/surface_functions.h",
%{comment}"include/surface_indirect_functions.h",
%{comment}"include/surface_types.h",
%{comment}"include/texture_fetch_functions.h",
%{comment}"include/texture_indirect_functions.h",
%{comment}"include/texture_types.h",
%{comment}"include/vector_functions.h",
%{comment}"include/vector_functions.hpp",
%{comment}"include/vector_types.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,73 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cudnn_ops_infer",
hdrs = [":headers"],
shared_library = "lib/libcudnn_ops_infer.so.%{libcudnn_ops_infer_version}",
)
cc_import(
name = "cudnn_cnn_infer",
hdrs = [":headers"],
shared_library = "lib/libcudnn_cnn_infer.so.%{libcudnn_cnn_infer_version}",
)
cc_import(
name = "cudnn_ops_train",
hdrs = [":headers"],
shared_library = "lib/libcudnn_ops_train.so.%{libcudnn_ops_train_version}",
)
cc_import(
name = "cudnn_cnn_train",
hdrs = [":headers"],
shared_library = "lib/libcudnn_cnn_train.so.%{libcudnn_cnn_train_version}",
)
cc_import(
name = "cudnn_adv_infer",
hdrs = [":headers"],
shared_library = "lib/libcudnn_adv_infer.so.%{libcudnn_adv_infer_version}",
)
cc_import(
name = "cudnn_adv_train",
hdrs = [":headers"],
shared_library = "lib/libcudnn_adv_train.so.%{libcudnn_adv_train_version}",
)
cc_import(
name = "cudnn_main",
hdrs = [":headers"],
shared_library = "lib/libcudnn.so.%{libcudnn_version}",
)
%{multiline_comment}
cc_library(
name = "cudnn",
%{comment}deps = [
%{comment}":cudnn_ops_infer",
%{comment}":cudnn_ops_train",
%{comment}":cudnn_cnn_infer",
%{comment}":cudnn_cnn_train",
%{comment}":cudnn_adv_infer",
%{comment}":cudnn_adv_train",
%{comment}"@cuda_nvrtc//:nvrtc",
%{comment}":cudnn_main",
%{comment}],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cudnn*.h",
%{comment}]),
include_prefix = "third_party/gpus/cudnn",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,80 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cudnn_ops",
hdrs = [":headers"],
shared_library = "lib/libcudnn_ops.so.%{libcudnn_ops_version}",
)
cc_import(
name = "cudnn_cnn",
hdrs = [":headers"],
shared_library = "lib/libcudnn_cnn.so.%{libcudnn_cnn_version}",
)
cc_import(
name = "cudnn_adv",
hdrs = [":headers"],
shared_library = "lib/libcudnn_adv.so.%{libcudnn_adv_version}",
)
cc_import(
name = "cudnn_graph",
hdrs = [":headers"],
shared_library = "lib/libcudnn_graph.so.%{libcudnn_graph_version}",
)
cc_import(
name = "cudnn_engines_precompiled",
hdrs = [":headers"],
shared_library = "lib/libcudnn_engines_precompiled.so.%{libcudnn_engines_precompiled_version}",
)
cc_import(
name = "cudnn_engines_runtime_compiled",
hdrs = [":headers"],
shared_library = "lib/libcudnn_engines_runtime_compiled.so.%{libcudnn_engines_runtime_compiled_version}",
)
cc_import(
name = "cudnn_heuristic",
hdrs = [":headers"],
shared_library = "lib/libcudnn_heuristic.so.%{libcudnn_heuristic_version}",
)
cc_import(
name = "cudnn_main",
hdrs = [":headers"],
shared_library = "lib/libcudnn.so.%{libcudnn_version}",
)
%{multiline_comment}
cc_library(
name = "cudnn",
%{comment}deps = [
%{comment}":cudnn_engines_precompiled",
%{comment}":cudnn_ops",
%{comment}":cudnn_graph",
%{comment}":cudnn_cnn",
%{comment}":cudnn_adv",
%{comment}":cudnn_engines_runtime_compiled",
%{comment}":cudnn_heuristic",
%{comment}"@cuda_nvrtc//:nvrtc",
%{comment}":cudnn_main",
%{comment}],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cudnn*.h",
%{comment}]),
include_prefix = "third_party/gpus/cudnn",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,29 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cufft_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcufft.so.%{libcufft_version}",
)
%{multiline_comment}
cc_library(
name = "cufft",
%{comment}deps = [":cufft_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cudalibxt.h",
%{comment}"include/cufft*.h"
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,59 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cupti_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcupti.so.%{libcupti_version}",
)
%{multiline_comment}
cc_library(
name = "cupti",
%{comment}deps = [":cupti_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/Openacc/**",
%{comment}"include/Openmp/**",
%{comment}"include/cuda_stdint.h",
%{comment}"include/cupti.h",
%{comment}"include/cupti_activity.h",
%{comment}"include/cupti_activity_deprecated.h",
%{comment}"include/cupti_callbacks.h",
%{comment}"include/cupti_checkpoint.h",
%{comment}"include/cupti_driver_cbid.h",
%{comment}"include/cupti_events.h",
%{comment}"include/cupti_metrics.h",
%{comment}"include/cupti_nvtx_cbid.h",
%{comment}"include/cupti_pcsampling.h",
%{comment}"include/cupti_pcsampling_util.h",
%{comment}"include/cupti_profiler_target.h",
%{comment}"include/cupti_result.h",
%{comment}"include/cupti_runtime_cbid.h",
%{comment}"include/cupti_sass_metrics.h",
%{comment}"include/cupti_target.h",
%{comment}"include/cupti_version.h",
%{comment}"include/generated_cudaGL_meta.h",
%{comment}"include/generated_cudaVDPAU_meta.h",
%{comment}"include/generated_cuda_gl_interop_meta.h",
%{comment}"include/generated_cuda_meta.h",
%{comment}"include/generated_cuda_runtime_api_meta.h",
%{comment}"include/generated_cuda_vdpau_interop_meta.h",
%{comment}"include/generated_cudart_removed_meta.h",
%{comment}"include/generated_nvtx_meta.h",
%{comment}"include/nvperf_common.h",
%{comment}"include/nvperf_cuda_host.h",
%{comment}"include/nvperf_host.h",
%{comment}"include/nvperf_target.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/extras/CUPTI/include",
includes = ["include/"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,26 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "curand_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcurand.so.%{libcurand_version}",
)
%{multiline_comment}
cc_library(
name = "curand",
%{comment}deps = [":curand_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob(["include/curand*.h"]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,34 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cusolver_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcusolver.so.%{libcusolver_version}",
deps = [
"@cuda_nvjitlink//:nvjitlink",
"@cuda_cusparse//:cusparse",
"@cuda_cublas//:cublas",
"@cuda_cublas//:cublasLt",
],
)
%{multiline_comment}
cc_library(
name = "cusolver",
%{comment}deps = [":cusolver_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/cusolver*.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,27 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "cusparse_shared_library",
hdrs = [":headers"],
shared_library = "lib/libcusparse.so.%{libcusparse_version}",
deps = ["@cuda_nvjitlink//:nvjitlink"],
)
%{multiline_comment}
cc_library(
name = "cusparse",
%{comment}deps = [":cusparse_shared_library"],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = ["include/cusparse.h"],
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,125 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic CUDA redistributions JSON repository initialization. Consult the WORKSPACE on how to use it."""
load("//third_party:repo.bzl", "tf_mirror_urls")
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_versions.bzl",
"CUDA_REDIST_JSON_DICT",
"CUDNN_REDIST_JSON_DICT",
)
def _get_env_var(ctx, name):
return ctx.os.environ.get(name)
def _get_json_file_content(repository_ctx, url_to_sha256, json_file_name):
if len(url_to_sha256) > 1:
(url, sha256) = url_to_sha256
else:
url = url_to_sha256[0]
sha256 = ""
repository_ctx.download(
url = tf_mirror_urls(url),
sha256 = sha256,
output = json_file_name,
)
return repository_ctx.read(repository_ctx.path(json_file_name))
def _cuda_redist_json_impl(repository_ctx):
cuda_version = (_get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
_get_env_var(repository_ctx, "TF_CUDA_VERSION"))
local_cuda_path = _get_env_var(repository_ctx, "LOCAL_CUDA_PATH")
cudnn_version = (_get_env_var(repository_ctx, "HERMETIC_CUDNN_VERSION") or
_get_env_var(repository_ctx, "TF_CUDNN_VERSION"))
local_cudnn_path = _get_env_var(repository_ctx, "LOCAL_CUDNN_PATH")
supported_cuda_versions = repository_ctx.attr.cuda_json_dict.keys()
if (cuda_version and not local_cuda_path and
(cuda_version not in supported_cuda_versions)):
fail(
("The supported CUDA versions are {supported_versions}." +
" Please provide a supported version in HERMETIC_CUDA_VERSION" +
" environment variable or add JSON URL for" +
" CUDA version={version}.")
.format(
supported_versions = supported_cuda_versions,
version = cuda_version,
),
)
supported_cudnn_versions = repository_ctx.attr.cudnn_json_dict.keys()
if cudnn_version and not local_cudnn_path and (cudnn_version not in supported_cudnn_versions):
fail(
("The supported CUDNN versions are {supported_versions}." +
" Please provide a supported version in HERMETIC_CUDNN_VERSION" +
" environment variable or add JSON URL for" +
" CUDNN version={version}.")
.format(
supported_versions = supported_cudnn_versions,
version = cudnn_version,
),
)
cuda_redistributions = "{}"
cudnn_redistributions = "{}"
if cuda_version and not local_cuda_path:
cuda_redistributions = _get_json_file_content(
repository_ctx,
repository_ctx.attr.cuda_json_dict[cuda_version],
"redistrib_cuda_%s.json" % cuda_version,
)
if cudnn_version and not local_cudnn_path:
cudnn_redistributions = _get_json_file_content(
repository_ctx,
repository_ctx.attr.cudnn_json_dict[cudnn_version],
"redistrib_cudnn_%s.json" % cudnn_version,
)
repository_ctx.file(
"distributions.bzl",
"""CUDA_REDISTRIBUTIONS = {cuda_redistributions}
CUDNN_REDISTRIBUTIONS = {cudnn_redistributions}
""".format(
cuda_redistributions = cuda_redistributions,
cudnn_redistributions = cudnn_redistributions,
),
)
repository_ctx.file(
"BUILD",
"",
)
cuda_redist_json = repository_rule(
implementation = _cuda_redist_json_impl,
attrs = {
"cuda_json_dict": attr.string_list_dict(mandatory = True),
"cudnn_json_dict": attr.string_list_dict(mandatory = True),
},
environ = [
"HERMETIC_CUDA_VERSION",
"HERMETIC_CUDNN_VERSION",
"TF_CUDA_VERSION",
"TF_CUDNN_VERSION",
"LOCAL_CUDA_PATH",
"LOCAL_CUDNN_PATH",
],
)
def cuda_json_init_repository(
cuda_json_dict = CUDA_REDIST_JSON_DICT,
cudnn_json_dict = CUDNN_REDIST_JSON_DICT):
cuda_redist_json(
name = "cuda_redist_json",
cuda_json_dict = cuda_json_dict,
cudnn_json_dict = cudnn_json_dict,
)

View File

@ -0,0 +1,75 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"bin/nvcc",
])
filegroup(
name = "nvvm",
srcs = [
"nvvm/libdevice/libdevice.10.bc",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "nvlink",
srcs = [
"bin/nvlink",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "fatbinary",
srcs = [
"bin/fatbinary",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "bin2c",
srcs = [
"bin/bin2c",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "ptxas",
srcs = [
"bin/ptxas",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "bin",
srcs = glob([
"bin/**",
"nvvm/bin/**",
]),
visibility = ["//visibility:public"],
)
filegroup(
name = "link_stub",
srcs = [
"bin/crt/link.stub",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/crt/**",
%{comment}"include/fatbinary_section.h",
%{comment}"include/nvPTXCompiler.h",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,17 @@
licenses(["restricted"]) # NVIDIA proprietary license
exports_files([
"version.txt",
])
%{multiline_comment}
cc_import(
name = "nvjitlink_shared_library",
shared_library = "lib/libnvJitLink.so.%{libnvjitlink_version}",
)
%{multiline_comment}
cc_library(
name = "nvjitlink",
%{comment}deps = [":nvjitlink_shared_library"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,10 @@
licenses(["restricted"]) # NVIDIA proprietary license
cc_library(
name = "headers",
%{comment}hdrs = ["include/nvml.h"],
include_prefix = "third_party/gpus/cuda/nvml/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,9 @@
licenses(["restricted"]) # NVIDIA proprietary license
filegroup(
name = "nvprune",
srcs = [
"bin/nvprune",
],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,20 @@
licenses(["restricted"]) # NVIDIA proprietary license
%{multiline_comment}
cc_import(
name = "nvrtc_main",
shared_library = "lib/libnvrtc.so.%{libnvrtc_version}",
)
cc_import(
name = "nvrtc_builtins",
shared_library = "lib/libnvrtc-builtins.so.%{libnvrtc-builtins_version}",
)
%{multiline_comment}
cc_library(
name = "nvrtc",
%{comment}deps = [
%{comment}":nvrtc_main",
%{comment}":nvrtc_builtins",
%{comment}],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,13 @@
licenses(["restricted"]) # NVIDIA proprietary license
cc_library(
name = "headers",
%{comment}hdrs = glob([
%{comment}"include/nvToolsExt*.h",
%{comment}"include/nvtx3/**",
%{comment}]),
include_prefix = "third_party/gpus/cuda/include",
includes = ["include"],
strip_include_prefix = "include",
visibility = ["@local_config_cuda//cuda:__pkg__"],
)

View File

@ -0,0 +1,491 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic CUDA repositories initialization. Consult the WORKSPACE on how to use it."""
load("//third_party:repo.bzl", "tf_mirror_urls")
load(
"//third_party/gpus/cuda/hermetic:cuda_redist_versions.bzl",
"CUDA_REDIST_PATH_PREFIX",
"CUDNN_REDIST_PATH_PREFIX",
"REDIST_VERSIONS_TO_BUILD_TEMPLATES",
)
OS_ARCH_DICT = {
"amd64": "x86_64-unknown-linux-gnu",
"aarch64": "aarch64-unknown-linux-gnu",
}
_REDIST_ARCH_DICT = {
"linux-x86_64": "x86_64-unknown-linux-gnu",
"linux-sbsa": "aarch64-unknown-linux-gnu",
}
SUPPORTED_ARCHIVE_EXTENSIONS = [
".zip",
".jar",
".war",
".aar",
".tar",
".tar.gz",
".tgz",
".tar.xz",
".txz",
".tar.zst",
".tzst",
".tar.bz2",
".tbz",
".ar",
".deb",
".whl",
]
def get_env_var(ctx, name):
return ctx.os.environ.get(name)
def _get_file_name(url):
last_slash_index = url.rfind("/")
return url[last_slash_index + 1:]
def get_archive_name(url):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns the archive name without extension."""
filename = _get_file_name(url)
for extension in SUPPORTED_ARCHIVE_EXTENSIONS:
if filename.endswith(extension):
return filename[:-len(extension)]
return filename
LIB_EXTENSION = ".so."
def _get_lib_name_and_version(path):
extension_index = path.rfind(LIB_EXTENSION)
last_slash_index = path.rfind("/")
lib_name = path[last_slash_index + 1:extension_index]
lib_version = path[extension_index + len(LIB_EXTENSION):]
return (lib_name, lib_version)
def _get_libraries_by_redist_name_in_dir(repository_ctx):
lib_dir_path = repository_ctx.path("lib")
if not lib_dir_path.exists:
return []
main_lib_name = "lib{}".format(repository_ctx.name.split("_")[1]).lower()
lib_dir_content = lib_dir_path.readdir()
return [
str(f)
for f in lib_dir_content
if (LIB_EXTENSION in str(f) and
main_lib_name in str(f).lower())
]
def get_lib_name_to_version_dict(repository_ctx):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns a dict of library names and major versions."""
lib_name_to_version_dict = {}
for path in _get_libraries_by_redist_name_in_dir(repository_ctx):
lib_name, lib_version = _get_lib_name_and_version(path)
key = "%%{%s_version}" % lib_name.lower()
# We need to find either major or major.minor version if there is no
# file with major version. E.g. if we have the following files:
# libcudart.so
# libcudart.so.12
# libcudart.so.12.3.2,
# we will save save {"%{libcudart_version}": "12"}.
if len(lib_version.split(".")) == 1:
lib_name_to_version_dict[key] = lib_version
if (len(lib_version.split(".")) == 2 and
key not in lib_name_to_version_dict):
lib_name_to_version_dict[key] = lib_version
return lib_name_to_version_dict
def create_dummy_build_file(repository_ctx, use_comment_symbols = True):
repository_ctx.template(
"BUILD",
repository_ctx.attr.build_templates[0],
{
"%{multiline_comment}": "'''" if use_comment_symbols else "",
"%{comment}": "#" if use_comment_symbols else "",
},
)
def _get_build_template(repository_ctx, major_lib_version):
template = None
for i in range(0, len(repository_ctx.attr.versions)):
for dist_version in repository_ctx.attr.versions[i].split(","):
if dist_version == major_lib_version:
template = repository_ctx.attr.build_templates[i]
break
if not template:
fail("No build template found for {} version {}".format(
repository_ctx.name,
major_lib_version,
))
return template
def get_major_library_version(repository_ctx, lib_name_to_version_dict):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns the major library version provided the versions dict."""
major_version = ""
if len(lib_name_to_version_dict) == 0:
return major_version
main_lib_name = "lib{}".format(repository_ctx.name.split("_")[1])
key = "%%{%s_version}" % main_lib_name
major_version = lib_name_to_version_dict[key]
return major_version
def create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_lib_version):
# buildifier: disable=function-docstring-args
"""Creates a BUILD file for the repository."""
if len(major_lib_version) == 0:
build_template_content = repository_ctx.read(
repository_ctx.attr.build_templates[0],
)
if "_version}" not in build_template_content:
create_dummy_build_file(repository_ctx, use_comment_symbols = False)
else:
create_dummy_build_file(repository_ctx)
return
build_template = _get_build_template(
repository_ctx,
major_lib_version.split(".")[0],
)
repository_ctx.template(
"BUILD",
build_template,
lib_name_to_version_dict | {
"%{multiline_comment}": "",
"%{comment}": "",
},
)
def _create_symlinks(repository_ctx, local_path, dirs):
for dir in dirs:
repository_ctx.symlink(
"{path}/{dir}".format(
path = local_path,
dir = dir,
),
dir,
)
def use_local_path(repository_ctx, local_path, dirs):
# buildifier: disable=function-docstring-args
"""Creates repository using local redistribution paths."""
_create_symlinks(
repository_ctx,
local_path,
dirs,
)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(
repository_ctx,
lib_name_to_version_dict,
)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _use_local_cuda_path(repository_ctx, local_cuda_path):
# buildifier: disable=function-docstring-args
""" Creates symlinks and initializes hermetic CUDA repository."""
use_local_path(
repository_ctx,
local_cuda_path,
["include", "lib", "bin", "nvvm"],
)
def _use_local_cudnn_path(repository_ctx, local_cudnn_path):
# buildifier: disable=function-docstring-args
""" Creates symlinks and initializes hermetic CUDNN repository."""
use_local_path(repository_ctx, local_cudnn_path, ["include", "lib"])
def _download_redistribution(repository_ctx, arch_key, path_prefix):
(url, sha256) = repository_ctx.attr.url_dict[arch_key]
# If url is not relative, then appending prefix is not needed.
if not (url.startswith("http") or url.startswith("file:///")):
url = path_prefix + url
archive_name = get_archive_name(url)
file_name = _get_file_name(url)
print("Downloading and extracting {}".format(url)) # buildifier: disable=print
repository_ctx.download(
url = tf_mirror_urls(url),
output = file_name,
sha256 = sha256,
)
if repository_ctx.attr.override_strip_prefix:
strip_prefix = repository_ctx.attr.override_strip_prefix
else:
strip_prefix = archive_name
repository_ctx.extract(
archive = file_name,
stripPrefix = strip_prefix,
)
repository_ctx.delete(file_name)
def _use_downloaded_cuda_redistribution(repository_ctx):
# buildifier: disable=function-docstring-args
""" Downloads CUDA redistribution and initializes hermetic CUDA repository."""
major_version = ""
cuda_version = (get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
get_env_var(repository_ctx, "TF_CUDA_VERSION"))
if not cuda_version:
# If no CUDA version is found, comment out all cc_import targets.
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
if len(repository_ctx.attr.url_dict) == 0:
print("{} is not found in redistributions list.".format(
repository_ctx.name,
)) # buildifier: disable=print
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
# Download archive only when GPU config is used.
arch_key = OS_ARCH_DICT[repository_ctx.os.arch]
if arch_key not in repository_ctx.attr.url_dict.keys():
fail(
("The supported platforms are {supported_platforms}." +
" Platform {platform} is not supported for {dist_name}.")
.format(
supported_platforms = repository_ctx.attr.url_dict.keys(),
platform = arch_key,
dist_name = repository_ctx.name,
),
)
_download_redistribution(
repository_ctx,
arch_key,
repository_ctx.attr.cuda_redist_path_prefix,
)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(repository_ctx, lib_name_to_version_dict)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _cuda_repo_impl(repository_ctx):
local_cuda_path = get_env_var(repository_ctx, "LOCAL_CUDA_PATH")
if local_cuda_path:
_use_local_cuda_path(repository_ctx, local_cuda_path)
else:
_use_downloaded_cuda_redistribution(repository_ctx)
cuda_repo = repository_rule(
implementation = _cuda_repo_impl,
attrs = {
"url_dict": attr.string_list_dict(mandatory = True),
"versions": attr.string_list(mandatory = True),
"build_templates": attr.label_list(mandatory = True),
"override_strip_prefix": attr.string(),
"cuda_redist_path_prefix": attr.string(),
},
environ = [
"HERMETIC_CUDA_VERSION",
"TF_CUDA_VERSION",
"LOCAL_CUDA_PATH",
],
)
def _use_downloaded_cudnn_redistribution(repository_ctx):
# buildifier: disable=function-docstring-args
""" Downloads CUDNN redistribution and initializes hermetic CUDNN repository."""
cudnn_version = None
major_version = ""
cudnn_version = (get_env_var(repository_ctx, "HERMETIC_CUDNN_VERSION") or
get_env_var(repository_ctx, "TF_CUDNN_VERSION"))
cuda_version = (get_env_var(repository_ctx, "HERMETIC_CUDA_VERSION") or
get_env_var(repository_ctx, "TF_CUDA_VERSION"))
if not cudnn_version:
# If no CUDNN version is found, comment out cc_import targets.
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
if len(repository_ctx.attr.url_dict) == 0:
print("{} is not found in redistributions list.".format(
repository_ctx.name,
)) # buildifier: disable=print
create_dummy_build_file(repository_ctx)
repository_ctx.file("version.txt", major_version)
return
# Download archive only when GPU config is used.
arch_key = OS_ARCH_DICT[repository_ctx.os.arch]
if arch_key not in repository_ctx.attr.url_dict.keys():
arch_key = "cuda{version}_{arch}".format(
version = cuda_version.split(".")[0],
arch = arch_key,
)
if arch_key not in repository_ctx.attr.url_dict.keys():
fail(
("The supported platforms are {supported_platforms}." +
" Platform {platform} is not supported for {dist_name}.")
.format(
supported_platforms = repository_ctx.attr.url_dict.keys(),
platform = arch_key,
dist_name = repository_ctx.name,
),
)
_download_redistribution(
repository_ctx,
arch_key,
repository_ctx.attr.cudnn_redist_path_prefix,
)
lib_name_to_version_dict = get_lib_name_to_version_dict(repository_ctx)
major_version = get_major_library_version(
repository_ctx,
lib_name_to_version_dict,
)
create_build_file(
repository_ctx,
lib_name_to_version_dict,
major_version,
)
repository_ctx.file("version.txt", major_version)
def _cudnn_repo_impl(repository_ctx):
local_cudnn_path = get_env_var(repository_ctx, "LOCAL_CUDNN_PATH")
if local_cudnn_path:
_use_local_cudnn_path(repository_ctx, local_cudnn_path)
else:
_use_downloaded_cudnn_redistribution(repository_ctx)
cudnn_repo = repository_rule(
implementation = _cudnn_repo_impl,
attrs = {
"url_dict": attr.string_list_dict(mandatory = True),
"versions": attr.string_list(mandatory = True),
"build_templates": attr.label_list(mandatory = True),
"override_strip_prefix": attr.string(),
"cudnn_redist_path_prefix": attr.string(),
},
environ = [
"HERMETIC_CUDNN_VERSION",
"TF_CUDNN_VERSION",
"HERMETIC_CUDA_VERSION",
"TF_CUDA_VERSION",
"LOCAL_CUDNN_PATH",
],
)
def _get_redistribution_urls(dist_info):
url_dict = {}
for arch in _REDIST_ARCH_DICT.keys():
if "relative_path" in dist_info[arch]:
url_dict[_REDIST_ARCH_DICT[arch]] = [
dist_info[arch]["relative_path"],
dist_info[arch].get("sha256", ""),
]
continue
if "full_path" in dist_info[arch]:
url_dict[_REDIST_ARCH_DICT[arch]] = [
dist_info[arch]["full_path"],
dist_info[arch].get("sha256", ""),
]
continue
for cuda_version, data in dist_info[arch].items():
# CUDNN JSON might contain paths for each CUDA version.
path_key = "relative_path"
if path_key not in data.keys():
path_key = "full_path"
url_dict["{cuda_version}_{arch}".format(
cuda_version = cuda_version,
arch = _REDIST_ARCH_DICT[arch],
)] = [data[path_key], data.get("sha256", "")]
return url_dict
def get_version_and_template_lists(version_to_template):
# buildifier: disable=function-docstring-return
# buildifier: disable=function-docstring-args
"""Returns lists of versions and templates provided in the dict."""
template_to_version_map = {}
for version, template in version_to_template.items():
if template not in template_to_version_map.keys():
template_to_version_map[template] = [version]
else:
template_to_version_map[template].append(version)
version_list = []
template_list = []
for template, versions in template_to_version_map.items():
version_list.append(",".join(versions))
template_list.append(Label(template))
return (version_list, template_list)
def cudnn_redist_init_repository(
cudnn_redistributions,
cudnn_redist_path_prefix = CUDNN_REDIST_PATH_PREFIX,
redist_versions_to_build_templates = REDIST_VERSIONS_TO_BUILD_TEMPLATES):
# buildifier: disable=function-docstring-args
"""Initializes CUDNN repository."""
if "cudnn" in cudnn_redistributions.keys():
url_dict = _get_redistribution_urls(cudnn_redistributions["cudnn"])
else:
url_dict = {}
repo_data = redist_versions_to_build_templates["cudnn"]
versions, templates = get_version_and_template_lists(
repo_data["version_to_template"],
)
cudnn_repo(
name = repo_data["repo_name"],
versions = versions,
build_templates = templates,
url_dict = url_dict,
cudnn_redist_path_prefix = cudnn_redist_path_prefix,
)
def cuda_redist_init_repositories(
cuda_redistributions,
cuda_redist_path_prefix = CUDA_REDIST_PATH_PREFIX,
redist_versions_to_build_templates = REDIST_VERSIONS_TO_BUILD_TEMPLATES):
# buildifier: disable=function-docstring-args
"""Initializes CUDA repositories."""
for redist_name, _ in redist_versions_to_build_templates.items():
if redist_name in ["cudnn", "cuda_nccl"]:
continue
if redist_name in cuda_redistributions.keys():
url_dict = _get_redistribution_urls(cuda_redistributions[redist_name])
else:
url_dict = {}
repo_data = redist_versions_to_build_templates[redist_name]
versions, templates = get_version_and_template_lists(
repo_data["version_to_template"],
)
cuda_repo(
name = repo_data["repo_name"],
versions = versions,
build_templates = templates,
url_dict = url_dict,
cuda_redist_path_prefix = cuda_redist_path_prefix,
)

View File

@ -0,0 +1,197 @@
# Copyright 2024 The TensorFlow Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hermetic CUDA redistribution versions."""
CUDA_REDIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cuda/redist/"
CUDNN_REDIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cudnn/redist/"
CUDA_REDIST_JSON_DICT = {
"11.8": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_11.8.0.json",
"941a950a4ab3b95311c50df7b3c8bca973e0cdda76fc2f4b456d2d5e4dac0281",
],
"12.1.1": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.1.1.json",
"bafea3cb83a4cf5c764eeedcaac0040d0d3c5db3f9a74550da0e7b6ac24d378c",
],
"12.3.1": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.3.1.json",
"b3cc4181d711cf9b6e3718f323b23813c24f9478119911d7b4bceec9b437dbc3",
],
"12.3.2": [
"https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.3.2.json",
"1b6eacf335dd49803633fed53ef261d62c193e5a56eee5019e7d2f634e39e7ef",
],
}
CUDNN_REDIST_JSON_DICT = {
"8.6": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.6.0.json",
"7f6f50bed4fd8216dc10d6ef505771dc0ecc99cce813993ab405cb507a21d51d",
],
"8.9.6": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.9.6.json",
"6069ef92a2b9bb18cebfbc944964bd2b024b76f2c2c35a43812982e0bc45cf0c",
],
"8.9.7.29": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.9.7.29.json",
"a0734f26f068522464fa09b2f2c186dfbe6ad7407a88ea0c50dd331f0c3389ec",
],
"9.1.1": [
"https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_9.1.1.json",
"d22d569405e5683ff8e563d00d6e8c27e5e6a902c564c23d752b22a8b8b3fe20",
],
}
# The versions are different for x86 and aarch64 architectures because only
# NCCL release versions 2.20.3 and 2.20.5 have the wheels for aarch64.
CUDA_12_NCCL_WHEEL_DICT = {
"x86_64-unknown-linux-gnu": {
"version": "2.21.5",
"url": "https://files.pythonhosted.org/packages/df/99/12cd266d6233f47d00daf3a72739872bdc10267d0383508b0b9c84a18bb6/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl",
"sha256": "8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0",
},
"aarch64-unknown-linux-gnu": {
"version": "2.20.5",
"url": "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl",
"sha256": "1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01",
},
}
CUDA_11_NCCL_WHEEL_DICT = {
"x86_64-unknown-linux-gnu": {
"version": "2.21.5",
"url": "https://files.pythonhosted.org/packages/ac/9a/8b6a28b3b87d5fddab0e92cd835339eb8fbddaa71ae67518c8c1b3d05bae/nvidia_nccl_cu11-2.21.5-py3-none-manylinux2014_x86_64.whl",
"sha256": "49d8350629c7888701d1fd200934942671cb5c728f49acc5a0b3a768820bed29",
},
}
CUDA_NCCL_WHEELS = {
"11.8": CUDA_11_NCCL_WHEEL_DICT,
"12.1.1": CUDA_12_NCCL_WHEEL_DICT,
"12.3.1": CUDA_12_NCCL_WHEEL_DICT,
"12.3.2": CUDA_12_NCCL_WHEEL_DICT,
}
REDIST_VERSIONS_TO_BUILD_TEMPLATES = {
"cuda_nccl": {
"repo_name": "cuda_nccl",
"version_to_template": {
"2": "//third_party/nccl/hermetic:cuda_nccl.BUILD.tpl",
},
},
"cudnn": {
"repo_name": "cuda_cudnn",
"version_to_template": {
"9": "//third_party/gpus/cuda/hermetic:cuda_cudnn9.BUILD.tpl",
"8": "//third_party/gpus/cuda/hermetic:cuda_cudnn.BUILD.tpl",
},
},
"libcublas": {
"repo_name": "cuda_cublas",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cublas.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cublas.BUILD.tpl",
},
},
"cuda_cudart": {
"repo_name": "cuda_cudart",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cudart.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cudart.BUILD.tpl",
},
},
"libcufft": {
"repo_name": "cuda_cufft",
"version_to_template": {
"11": "//third_party/gpus/cuda/hermetic:cuda_cufft.BUILD.tpl",
"10": "//third_party/gpus/cuda/hermetic:cuda_cufft.BUILD.tpl",
},
},
"cuda_cupti": {
"repo_name": "cuda_cupti",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cupti.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cupti.BUILD.tpl",
},
},
"libcurand": {
"repo_name": "cuda_curand",
"version_to_template": {
"10": "//third_party/gpus/cuda/hermetic:cuda_curand.BUILD.tpl",
},
},
"libcusolver": {
"repo_name": "cuda_cusolver",
"version_to_template": {
"11": "//third_party/gpus/cuda/hermetic:cuda_cusolver.BUILD.tpl",
},
},
"libcusparse": {
"repo_name": "cuda_cusparse",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cusparse.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cusparse.BUILD.tpl",
},
},
"libnvjitlink": {
"repo_name": "cuda_nvjitlink",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvjitlink.BUILD.tpl",
},
},
"cuda_nvrtc": {
"repo_name": "cuda_nvrtc",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvrtc.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvrtc.BUILD.tpl",
},
},
"cuda_cccl": {
"repo_name": "cuda_cccl",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_cccl.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_cccl.BUILD.tpl",
},
},
"cuda_nvcc": {
"repo_name": "cuda_nvcc",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvcc.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvcc.BUILD.tpl",
},
},
"cuda_nvml_dev": {
"repo_name": "cuda_nvml",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvml.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvml.BUILD.tpl",
},
},
"cuda_nvprune": {
"repo_name": "cuda_nvprune",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvprune.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvprune.BUILD.tpl",
},
},
"cuda_nvtx": {
"repo_name": "cuda_nvtx",
"version_to_template": {
"12": "//third_party/gpus/cuda/hermetic:cuda_nvtx.BUILD.tpl",
"11": "//third_party/gpus/cuda/hermetic:cuda_nvtx.BUILD.tpl",
},
},
}

View File

@ -1,5 +1,7 @@
"""Repository rule for CUDA autoconfiguration. """Repository rule for CUDA autoconfiguration.
NB: DEPRECATED! Use `hermetic/cuda_configure` rule instead.
`cuda_configure` depends on the following environment variables: `cuda_configure` depends on the following environment variables:
* `TF_NEED_CUDA`: Whether to enable building with CUDA. * `TF_NEED_CUDA`: Whether to enable building with CUDA.
@ -53,6 +55,11 @@ load(
"realpath", "realpath",
"which", "which",
) )
load(
":compiler_common_tools.bzl",
"get_cxx_inc_directories",
"to_list_of_strings",
)
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
_GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
@ -67,20 +74,6 @@ _TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO"
_TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG" _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG"
_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
def to_list_of_strings(elements):
"""Convert the list of ["a", "b", "c"] into '"a", "b", "c"'.
This is to be used to put a list of strings into the bzl file templates
so it gets interpreted as list of strings in Starlark.
Args:
elements: list of string elements
Returns:
single string of elements wrapped in quotes separated by a comma."""
quoted_strings = ["\"" + element + "\"" for element in elements]
return ", ".join(quoted_strings)
def verify_build_defines(params): def verify_build_defines(params):
"""Verify all variables that crosstool/BUILD.tpl expects are substituted. """Verify all variables that crosstool/BUILD.tpl expects are substituted.
@ -238,156 +231,6 @@ def find_cc(repository_ctx, use_cuda_clang):
" environment variable").format(target_cc_name, cc_path_envvar)) " environment variable").format(target_cc_name, cc_path_envvar))
return cc return cc
_INC_DIR_MARKER_BEGIN = "#include <...>"
# OSX add " (framework directory)" at the end of line, strip it.
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
def _cxx_inc_convert(path):
"""Convert path returned by cc -E xc++ in a complete path."""
path = path.strip()
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
return path
def _normalize_include_path(repository_ctx, path):
"""Normalizes include paths before writing them to the crosstool.
If path points inside the 'crosstool' folder of the repository, a relative
path is returned.
If path points outside the 'crosstool' folder, an absolute path is returned.
"""
path = str(repository_ctx.path(path))
crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
if path.startswith(crosstool_folder):
# We drop the path to "$REPO/crosstool" and a trailing path separator.
return path[len(crosstool_folder) + 1:]
return path
def _is_compiler_option_supported(repository_ctx, cc, option):
"""Checks that `option` is supported by the C compiler. Doesn't %-escape the option."""
result = repository_ctx.execute([
cc,
option,
"-o",
"/dev/null",
"-c",
str(repository_ctx.path("tools/cpp/empty.cc")),
])
return result.stderr.find(option) == -1
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"
sysroot = []
if tf_sysroot:
sysroot += ["--sysroot", tf_sysroot]
result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] +
sysroot)
stderr = err_out(result)
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
return []
index1 = stderr.find("\n", index1)
if index1 == -1:
return []
index2 = stderr.rfind("\n ")
if index2 == -1 or index2 < index1:
return []
index2 = stderr.find("\n", index2 + 1)
if index2 == -1:
inc_dirs = stderr[index1 + 1:]
else:
inc_dirs = stderr[index1 + 1:index2].strip()
print_resource_dir_supported = _is_compiler_option_supported(
repository_ctx,
cc,
"-print-resource-dir",
)
if print_resource_dir_supported:
resource_dir = repository_ctx.execute(
[cc, "-print-resource-dir"],
).stdout.strip() + "/share"
inc_dirs += "\n" + resource_dir
compiler_includes = [
_normalize_include_path(repository_ctx, _cxx_inc_convert(p))
for p in inc_dirs.split("\n")
]
# The compiler might be on a symlink, e.g. /symlink -> /opt/gcc
# The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11)
# but Bazel might encounter either (usually reported by the compiler)
# especially when a compiler wrapper (e.g. ccache) is used.
# So we need to also include paths where symlinks are not resolved.
# Try to find real path to CC installation to "see through" compiler wrappers
# GCC has the path to g++
index1 = result.stderr.find("COLLECT_GCC=")
if index1 != -1:
index1 = result.stderr.find("=", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname
else:
# Clang has the directory
index1 = result.stderr.find("InstalledDir: ")
if index1 != -1:
index1 = result.stderr.find(" ", index1)
index2 = result.stderr.find("\n", index1)
cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname
else:
# Fallback to the CC path
cc_topdir = repository_ctx.path(cc).dirname.dirname
# We now have the compiler installation prefix, e.g. /symlink/gcc
# And the resolved installation prefix, e.g. /opt/gcc
cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip()
cc_topdir = str(cc_topdir).strip()
# If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept.
# e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path]
# adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include]
if cc_topdir_resolved != cc_topdir:
unresolved_compiler_includes = [
cc_topdir + inc[len(cc_topdir_resolved):]
for inc in compiler_includes
if inc.startswith(cc_topdir_resolved)
]
compiler_includes = compiler_includes + unresolved_compiler_includes
return compiler_includes
def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot):
"""Compute the list of default C and C++ include directories."""
# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
True,
tf_sysroot,
)
includes_c = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
False,
tf_sysroot,
)
return includes_cpp + [
inc
for inc in includes_c
if inc not in includes_cpp
]
def auto_configure_fail(msg): def auto_configure_fail(msg):
"""Output failure message when cuda configuration fails.""" """Output failure message when cuda configuration fails."""
red = "\033[0;31m" red = "\033[0;31m"
@ -1293,6 +1136,7 @@ def _create_local_cuda_repository(repository_ctx):
cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
cuda_defines["%{unfiltered_compile_flags}"] = "" cuda_defines["%{unfiltered_compile_flags}"] = ""
cuda_defines["%{cuda_nvcc_files}"] = "[]"
if is_cuda_clang and not is_nvcc_and_clang: if is_cuda_clang and not is_nvcc_and_clang:
cuda_defines["%{host_compiler_path}"] = str(cc) cuda_defines["%{host_compiler_path}"] = str(cc)
cuda_defines["%{host_compiler_warnings}"] = """ cuda_defines["%{host_compiler_warnings}"] = """

Some files were not shown because too many files have changed in this diff Show More