[Bazel] Add CUDA build to CI (#66241)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/35316
On master, bazel cuda build is disabled due to lack of a proper `cu_library` rule. This PR:
- Add `rules_cuda` to the WORKSPACE and forward `cu_library` to `rules_cuda`.
- Use a simple local cuda and cudnn repositories (adopted from TRTorch) for cuda 11.3.
- Fix current broken cuda build.
- Enable cuda build in CI, not just for `:torch` target but all the test binaries to catch undefined symbols.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/66241

Reviewed By: ejguan

Differential Revision: D31544091

Pulled By: malfet

fbshipit-source-id: fd3c34d0e8f80fee06f015694a4c13a8e9e12206
This commit is contained in:
Thuyen Ngo 2021-12-17 13:41:24 -08:00 committed by Facebook GitHub Bot
parent e0f4e28c69
commit e35bf56461
17 changed files with 767 additions and 828 deletions

View File

@ -1,6 +1,7 @@
build --copt=--std=c++14
build --copt=-I.
build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
build --experimental_ui_max_stdouterr_bytes=2048576
# Configuration to disable tty features for environments like CI
build:no-tty --curses no
@ -11,3 +12,8 @@ build:no-tty --show_progress_rate_limit 10
build:gpu --define=cuda=true
# define a separate build folder for faster switching between configs
build:gpu --platform_suffix=-gpu
# rules_cuda configuration
build:gpu --@rules_cuda//cuda:enable_cuda
build:gpu --@rules_cuda//cuda:cuda_targets=sm_52
build:gpu --@rules_cuda//cuda:compiler=nvcc
build:gpu --repo_env=CUDA_PATH=/usr/local/cuda

View File

@ -20,13 +20,13 @@
"linux-docs-push",
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"macos-10-15-py3-arm64",
"macos-10-15-py3-lite-interpreter-x86-64",
"macos-11-py3-x86-64",
@ -48,7 +48,7 @@
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit"
],
"ciflow/bazel": [
"linux-xenial-py3.6-gcc7-bazel-test"
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test"
],
"ciflow/cpu": [
"caffe2-linux-xenial-py3.6-gcc5.4",
@ -56,11 +56,11 @@
"linux-docs",
"linux-docs-push",
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
@ -85,13 +85,13 @@
"linux-docs",
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
"win-vs2019-cpu-py3",
@ -126,13 +126,13 @@
"linux-docs-push",
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4",
"periodic-libtorch-linux-bionic-cuda11.5-py3.6-gcc7",
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
@ -203,13 +203,13 @@
"linux-docs",
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"macos-10-15-py3-arm64",
"macos-10-15-py3-lite-interpreter-x86-64",
"macos-11-py3-x86-64",

View File

@ -609,8 +609,8 @@ ANDROID_WORKFLOWS = [
BAZEL_WORKFLOWS = [
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3.6-gcc7-bazel-test",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
build_environment="linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
test_runner_type=LINUX_CPU_TEST_RUNNER,
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_BAZEL, LABEL_CIFLOW_CPU, LABEL_CIFLOW_LINUX},

View File

@ -0,0 +1,331 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/bazel_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
- fbsync
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
IS_GHA: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AWS_DEFAULT_REGION: us-east-1
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
PYTORCH_RETRY_TEST_CASES: 1
concurrency:
group: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: linux.large
timeout-minutes: 240
env:
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/bazel') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') }}
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
if: ${{ (github.repository == 'pytorch/pytorch') && (
(github.event_name == 'push') ||
(github.event_name == 'schedule') ||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/bazel') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux')) || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') ||
((github.event_name == 'pull_request' && github.event.action != 'unassigned') && !contains(join(github.event.pull_request.labels.*.name), 'ciflow/')))
}}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
# building and testing in a single job since bazel runs only small subset of tests
build-and-test:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test-build-and-test
NUM_TEST_SHARDS: 1
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_SKIP_S3_UPLOAD: 1
working-directory: .circleci/docker
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
./build_docker.sh
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- name: Output disk space left
run: |
sudo df -H
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e PR_LABELS \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
BRANCH: ${{ steps.parse-ref.outputs.branch }}
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
TAG: ${{ steps.parse-ref.outputs.tag }}
WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Test
# Time out the test phase after 3.5 hours
timeout-minutes: 210
run: |
# detached container should get cleaned up by teardown_ec2_linux
export SHARD_NUMBER=0
# TODO: Stop building test binaries as part of the build phase
# Make sure we copy test results from bazel-testlogs symlink to
# a regular directory ./test/test-reports
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e SHARD_NUMBER \
-e NUM_TEST_SHARDS \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: 'bazel-${{ github.job }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Zip JSONs for upload
if: always()
env:
FILE_SUFFIX: 'bazel-${{ github.job }}'
run: |
# Remove any previous test jsons if they exist
rm -f test-jsons-*.zip
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Downloaded JSONs on S3
if: always()
with:
retention-days: 14
if-no-files-found: warn
path:
test-jsons-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test-test
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
TAG: ${{ steps.parse-ref.outputs.tag }}
WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.19.12
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -207,11 +207,10 @@ if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
get_bazel
# first build the whole torch for CPU-only
# first build torch for CPU-only
tools/bazel build --config=no-tty :torch
# then build selected set of targets with GPU-support.
# TODO: eventually this should converge to building the whole :torch with GPU-support
tools/bazel build --config=no-tty --config=gpu //c10
# then build everything with CUDA
tools/bazel build --config=no-tty --config=gpu :all
else
# check that setup.py would fail with bad arguments
echo "The next three invocations are expected to fail with invalid command error messages."

View File

@ -3,7 +3,7 @@ load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
load("@rules_proto//proto:defs.bzl", "proto_library")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_proto_library", "cc_test")
load("//third_party:substitution.bzl", "header_template_rule")
load("//:tools/build_variables.bzl", "torch_cpp_srcs", "libtorch_python_core_sources", "libtorch_core_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "jit_core_sources")
load("//:tools/build_variables.bzl", "jit_core_sources", "libtorch_core_sources", "libtorch_cuda_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "libtorch_nvfuser_generated_headers", "libtorch_nvfuser_runtime_sources", "libtorch_python_core_sources", "torch_cpp_srcs")
load("//tools/rules:cu.bzl", "cu_library")
load("//tools/config:defs.bzl", "if_cuda")
load("//:aten.bzl", "intern_build_aten_ops", "generate_aten")
@ -15,6 +15,7 @@ COMMON_COPTS = [
"-DHAVE_SHM_UNLINK=1",
"-D_FILE_OFFSET_BITS=64",
"-DHAVE_GCC_GET_CPUID",
"-DTH_BLAS_MKL",
"-DUSE_GCC_GET_CPUID",
"-DTH_HAVE_THREAD",
"-DUSE_FBGEMM",
@ -37,11 +38,11 @@ py_binary(
],
)
aten_generation_srcs = ["aten/src/ATen/native/native_functions.yaml"] + glob(["aten/src/ATen/templates/**"])
generate_aten(
name = "generated_cpp",
srcs = [
"aten/src/ATen/native/native_functions.yaml",
] + glob(["aten/src/ATen/templates/**"]),
srcs = aten_generation_srcs,
outs = [
"aten/src/ATen/Declarations.yaml",
"aten/src/ATen/RegisterBackendSelect.cpp",
@ -62,8 +63,6 @@ generate_aten(
"aten/src/ATen/RegisterSchema.cpp",
"aten/src/ATen/CPUFunctions.h",
"aten/src/ATen/CPUFunctions_inl.h",
"aten/src/ATen/CUDAFunctions.h",
"aten/src/ATen/CUDAFunctions_inl.h",
"aten/src/ATen/CompositeExplicitAutogradFunctions.h",
"aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
"aten/src/ATen/CompositeImplicitAutogradFunctions.h",
@ -82,6 +81,8 @@ generate_aten(
"aten/src/ATen/MetaFunctions.h",
"aten/src/ATen/MetaFunctions_inl.h",
"aten/src/ATen/MethodOperators.h",
"aten/src/ATen/NativeMetaFunctions.h",
"aten/src/ATen/RegistrationDeclarations.h",
"aten/src/ATen/core/TensorBody.h",
"aten/src/ATen/core/TensorMethods.cpp",
"aten/src/ATen/core/ATenOpList.cpp",
@ -89,6 +90,23 @@ generate_aten(
generator=":gen",
)
# this hack is due to https://github.com/bazelbuild/bazel/issues/281
# since `outs` cannot be configured with if_cuda, we rerun the same command and declare cuda related files separately here.
genrule(
name = "generated_cuda_cpp",
srcs = aten_generation_srcs,
outs = [
"aten/src/ATen/CUDAFunctions.h",
"aten/src/ATen/CUDAFunctions_inl.h",
"aten/src/ATen/RegisterCUDA.cpp",
"aten/src/ATen/RegisterQuantizedCUDA.cpp",
"aten/src/ATen/RegisterSparseCUDA.cpp",
"aten/src/ATen/RegisterSparseCsrCUDA.cpp",
],
cmd = "$(location :gen) --source-path `dirname $(location aten/src/ATen/native/native_functions.yaml)`/.. --install_dir `dirname $(location aten/src/ATen/RegisterCUDA.cpp)`",
tools = [":gen"],
)
py_library(
name = "tools_codegen",
srcs = glob(["tools/codegen/**/*.py"]),
@ -230,7 +248,7 @@ filegroup(
filegroup(
name = "aten_native_mkl_cpp",
srcs = glob(["aten/src/ATen/native/mkl/*.cpp"]),
srcs = glob(["aten/src/ATen/native/mkl/*.cpp", "aten/src/ATen/mkl/*.cpp"]),
)
filegroup(
@ -266,135 +284,40 @@ filegroup(
)
filegroup(
name = "aten_cuda_srcs",
srcs = [
"aten/src/ATen/cuda/CUDABlas.cpp",
"aten/src/ATen/cuda/CUDASolver.cpp",
"aten/src/ATen/cuda/CUDAContext.cpp",
"aten/src/ATen/cuda/CUDAGeneratorImpl.cpp",
"aten/src/ATen/cuda/CUDAGraph.cpp",
"aten/src/ATen/cuda/CuSparseHandlePool.cpp",
"aten/src/ATen/cuda/CublasHandlePool.cpp",
"aten/src/ATen/cuda/CusolverDnHandlePool.cpp",
"aten/src/ATen/cuda/PinnedMemoryAllocator.cpp",
"aten/src/ATen/cuda/detail/CUDAHooks.cpp",
"aten/src/ATen/cudnn/AutocastRNN.cpp",
"aten/src/ATen/cudnn/Descriptors.cpp",
"aten/src/ATen/cudnn/Handle.cpp",
"aten/src/ATen/cudnn/Types.cpp",
"aten/src/ATen/native/cuda/CUDAUnaryOps.cpp",
"aten/src/ATen/native/cuda/TensorShapeCUDA.cpp",
"aten/src/ATen/native/cudnn/AffineGridGenerator.cpp",
"aten/src/ATen/native/cudnn/BatchNorm.cpp",
"aten/src/ATen/native/cudnn/Conv.cpp",
"aten/src/ATen/native/cudnn/GridSampler.cpp",
"aten/src/ATen/native/cudnn/LossCTC.cpp",
"aten/src/ATen/native/cudnn/RNN.cpp",
"aten/src/ATen/native/miopen/BatchNorm_miopen.cpp",
"aten/src/ATen/native/miopen/Conv_miopen.cpp",
"aten/src/ATen/native/miopen/RNN_miopen.cpp",
"aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp",
"aten/src/ATen/native/sparse/cuda/SparseBlas.cpp",
"aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp",
],
name = "aten_cuda_cpp_srcs",
srcs = glob(
[
"aten/src/ATen/cuda/*.cpp",
"aten/src/ATen/cuda/detail/*.cpp",
"aten/src/ATen/cudnn/*.cpp",
"aten/src/ATen/native/cuda/*.cpp",
"aten/src/ATen/native/cudnn/*.cpp",
"aten/src/ATen/native/miopen/*.cpp",
"aten/src/ATen/native/sparse/cuda/*.cpp",
"aten/src/THC/*.cpp",
],
),
)
filegroup(
name = "aten_srcs_cu",
srcs = [
"aten/src/ATen/cuda/cub.cu.cc",
"aten/src/ATen/cuda/detail/IndexUtils.cu.cc",
"aten/src/ATen/cuda/detail/CUDAGraphsUtils.cu.cc",
"aten/src/ATen/native/cuda/Activation.cu.cc",
"aten/src/ATen/native/cuda/AdaptiveAveragePooling.cu.cc",
"aten/src/ATen/native/cuda/AdaptiveAveragePooling3d.cu.cc",
"aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu.cc",
"aten/src/ATen/native/cuda/AdaptiveMaxPooling3d.cu.cc",
"aten/src/ATen/native/cuda/AveragePool2d.cu.cc",
"aten/src/ATen/native/cuda/AveragePool3d.cu.cc",
"aten/src/ATen/native/cuda/BatchLinearAlgebra.cu.cc",
"aten/src/ATen/native/cuda/BatchLinearAlgebraLib.cu.cc",
"aten/src/ATen/native/cuda/BinaryArithmeticKernel.cu.cc",
"aten/src/ATen/native/cuda/BinaryCompareKernel.cu.cc",
"aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu.cc",
"aten/src/ATen/native/cuda/CUDAScalar.cu.cc",
"aten/src/ATen/native/cuda/Col2Im.cu.cc",
"aten/src/ATen/native/cuda/Copy.cu.cc",
"aten/src/ATen/native/cuda/CrossKernel.cu.cc",
"aten/src/ATen/native/cuda/DilatedMaxPool2d.cu.cc",
"aten/src/ATen/native/cuda/DilatedMaxPool3d.cu.cc",
"aten/src/ATen/native/cuda/DistanceKernel.cu.cc",
"aten/src/ATen/native/cuda/Distributions.cu.cc",
"aten/src/ATen/native/cuda/Dropout.cu.cc",
"aten/src/ATen/native/cuda/Embedding.cu.cc",
"aten/src/ATen/native/cuda/EmbeddingBackwardKernel.cu.cc",
"aten/src/ATen/native/cuda/EmbeddingBag.cu.cc",
"aten/src/ATen/native/cuda/FillKernel.cu.cc",
"aten/src/ATen/native/cuda/FractionalMaxPool2d.cu.cc",
"aten/src/ATen/native/cuda/FractionalMaxPool3d.cu.cc",
"aten/src/ATen/native/cuda/GridSampler.cu.cc",
"aten/src/ATen/native/cuda/Im2Col.cu.cc",
"aten/src/ATen/native/cuda/IndexKernel.cu.cc",
"aten/src/ATen/native/cuda/Indexing.cu.cc",
"aten/src/ATen/native/cuda/Lerp.cu.cc",
"aten/src/ATen/native/cuda/LinearAlgebra.cu.cc",
"aten/src/ATen/native/cuda/Loss.cu.cc",
"aten/src/ATen/native/cuda/LossCTC.cu.cc",
"aten/src/ATen/native/cuda/MaxUnpooling.cu.cc",
"aten/src/ATen/native/cuda/MultinomialKernel.cu.cc",
"aten/src/ATen/native/cuda/MultiLabelMarginCriterion.cu.cc",
"aten/src/ATen/native/cuda/NaiveConvolutionTranspose2d.cu.cc",
"aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu.cc",
"aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu.cc",
"aten/src/ATen/native/cuda/NLLLoss2d.cu.cc",
"aten/src/ATen/native/cuda/Normalization.cu.cc",
"aten/src/ATen/native/cuda/PointwiseOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/PowKernel.cu.cc",
"aten/src/ATen/native/cuda/RNN.cu.cc",
"aten/src/ATen/native/cuda/RangeFactories.cu.cc",
"aten/src/ATen/native/cuda/Reduce.cu.cc",
"aten/src/ATen/native/cuda/ReduceOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/ReflectionPad.cu.cc",
"aten/src/ATen/native/cuda/Repeat.cu.cc",
"aten/src/ATen/native/cuda/ReplicationPadding.cu.cc",
"aten/src/ATen/native/cuda/Resize.cu.cc",
"aten/src/ATen/native/cuda/SegmentReduce.cu.cc",
"aten/src/ATen/native/cuda/SoftMax.cu.cc",
"aten/src/ATen/native/cuda/SortingKthValue.cu.cc",
"aten/src/ATen/native/cuda/SparseMM.cu.cc",
"aten/src/ATen/native/cuda/SpectralOps.cu.cc",
"aten/src/ATen/native/cuda/SummaryOps.cu.cc",
"aten/src/ATen/native/cuda/TensorCompare.cu.cc",
"aten/src/ATen/native/cuda/TensorFactories.cu.cc",
"aten/src/ATen/native/cuda/TensorTopK.cu.cc",
"aten/src/ATen/native/cuda/TensorTransformations.cu.cc",
"aten/src/ATen/native/cuda/TriangularOps.cu.cc",
"aten/src/ATen/native/cuda/UnaryOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/UnarySpecialOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/Unique.cu.cc",
"aten/src/ATen/native/cuda/UpSampleBicubic2d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleBilinear2d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleLinear1d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleNearest1d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleNearest2d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleNearest3d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleTrilinear3d.cu.cc",
"aten/src/ATen/native/cuda/WeightNorm.cu.cc",
"aten/src/ATen/native/cuda/layer_norm_kernel.cu.cc",
"aten/src/ATen/native/quantized/cuda/fake_quantize_core.cu.cc",
"aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu.cc",
"aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu.cc",
"aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu.cc",
],
name = "aten_cu_srcs",
srcs = glob([
"aten/src/ATen/cuda/*.cu",
"aten/src/ATen/cuda/detail/*.cu",
"aten/src/ATen/native/cuda/*.cu",
"aten/src/ATen/native/quantized/cuda/*.cu",
"aten/src/ATen/native/sparse/cuda/*.cu",
]),
)
header_template_rule(
name = "aten_src_ATen_config",
src = "aten/src/ATen/Config.h.in",
out = "aten/src/ATen/Config.h",
include = "aten/src",
substitutions = {
"@AT_MKLDNN_ENABLED@": "1",
"@AT_MKL_ENABLED@": "0",
"@AT_MKL_ENABLED@": "1",
"@AT_FFTW_ENABLED@": "0",
"@AT_POCKETFFT_ENABLED@": "0",
"@AT_NNPACK_ENABLED@": "0",
@ -413,6 +336,7 @@ header_template_rule(
name = "aten_src_ATen_cuda_config",
src = "aten/src/ATen/cuda/CUDAConfig.h.in",
out = "aten/src/ATen/cuda/CUDAConfig.h",
include = "aten/src",
substitutions = {
"@AT_CUDNN_ENABLED@": "1",
"@AT_ROCM_ENABLED@": "0",
@ -429,18 +353,19 @@ cc_library(
] + glob([
"aten/src/**/*.h",
"aten/src/**/*.hpp",
"aten/src/ATen/cuda/**/*.cuh",
"aten/src/ATen/native/**/*.cuh",
"aten/src/TH/**/*.cpp",
"aten/src/THC/*.cuh",
"aten/src/THC/generic/*.cu",
],
exclude = [
"aten/src/ATen/Config.h",
],) + [
":generated_cpp",
) + [
":aten_src_ATen_config",
":generated_cpp",
":generated_cuda_cpp",
],
includes = [
"aten/src",
"aten/src/TH",
],
deps = [
"//c10:headers",
@ -464,6 +389,7 @@ intern_build_aten_ops(
":aten_headers",
"@sleef",
"@fbgemm",
"@mkl",
],
)
@ -530,12 +456,17 @@ cc_binary(
cc_library(
name = "aten_cuda_cpp",
srcs = [":aten_cuda_srcs"],
srcs = [
":aten_cuda_cpp_srcs",
":generated_cuda_cpp",
],
hdrs = [":aten_src_ATen_cuda_config"],
copts = ATEN_COPTS,
visibility = ["//visibility:public"],
deps = [
":aten",
"@cuda",
"@cuda//:cusolver",
"@cuda//:nvrtc",
"@cudnn",
],
@ -552,9 +483,7 @@ torch_cuda_half_options = [
cu_library(
name = "aten_cuda",
srcs = [
":aten_srcs_cu",
],
srcs = [":aten_cu_srcs"],
copts = ATEN_COPTS + torch_cuda_half_options,
visibility = ["//visibility:public"],
deps = [
@ -618,6 +547,7 @@ header_template_rule(
filegroup(
name = "caffe2_contrib_srcs",
srcs = [
"caffe2/contrib/aten/aten_op.cc",
"caffe2/contrib/gloo/allgather_ops.cc",
"caffe2/contrib/gloo/allreduce_ops.cc",
"caffe2/contrib/gloo/barrier_ops.cc",
@ -787,6 +717,7 @@ filegroup(
"caffe2/operators/conv_op_eigen.cc",
"caffe2/operators/conv_op_shared.cc",
"caffe2/operators/conv_transpose_gradient_op.cc",
"caffe2/operators/conv_transpose_op.cc",
"caffe2/operators/conv_transpose_op_mobile.cc",
"caffe2/operators/copy_op.cc",
"caffe2/operators/copy_rows_to_tensor_op.cc",
@ -1182,7 +1113,7 @@ filegroup(
)
filegroup(
name = "caffe2_cuda_srcs",
name = "caffe2_cuda_cpp_srcs",
srcs = [
"caffe2/contrib/aten/aten_op_gpu.cc",
"caffe2/contrib/gloo/allreduce_ops_gpu.cc",
@ -1251,155 +1182,155 @@ filegroup(
filegroup(
name = "caffe2_cu_srcs",
srcs = [
"caffe2/core/context_gpu.cu.cc",
"caffe2/operators/abs_op.cu.cc",
"caffe2/operators/accumulate_op.cu.cc",
"caffe2/operators/accuracy_op.cu.cc",
"caffe2/operators/acos_op.cu.cc",
"caffe2/operators/affine_channel_op.cu.cc",
"caffe2/operators/alias_with_name.cu.cc",
"caffe2/operators/arg_ops.cu.cc",
"caffe2/operators/asin_op.cu.cc",
"caffe2/operators/assert_op.cu.cc",
"caffe2/operators/atan_op.cu.cc",
"caffe2/operators/batch_gather_ops.cu.cc",
"caffe2/operators/batch_matmul_op.cu.cc",
"caffe2/operators/batch_moments_op.cu.cc",
"caffe2/operators/batch_permutation_op.cu.cc",
"caffe2/operators/batch_sparse_to_dense_op.cu.cc",
"caffe2/operators/boolean_mask_ops.cu.cc",
"caffe2/operators/boolean_unmask_ops.cu.cc",
"caffe2/operators/bucketize_op.cu.cc",
"caffe2/operators/cast_op.cu.cc",
"caffe2/operators/cbrt_op.cu.cc",
"caffe2/operators/ceil_op.cu.cc",
"caffe2/operators/channel_backprop_stats_op.cu.cc",
"caffe2/operators/channel_shuffle_op.cu.cc",
"caffe2/operators/channel_stats_op.cu.cc",
"caffe2/operators/channelwise_conv3d_op_cudnn.cu.cc",
"caffe2/operators/clip_op.cu.cc",
"caffe2/operators/copy_op.cu.cc",
"caffe2/operators/cos_op.cu.cc",
"caffe2/operators/cosh_op.cu.cc",
"caffe2/operators/cosine_embedding_criterion_op.cu.cc",
"caffe2/operators/cross_entropy_op.cu.cc",
"caffe2/operators/cube_op.cu.cc",
"caffe2/operators/data_couple_gpu.cu.cc",
"caffe2/operators/deform_conv_op.cu.cc",
"caffe2/operators/depthwise_3x3_conv_op_cudnn.cu.cc",
"caffe2/operators/distance_op.cu.cc",
"caffe2/operators/dropout_op.cu.cc",
"caffe2/operators/elementwise_div_op.cu.cc",
"caffe2/operators/elementwise_linear_op.cu.cc",
"caffe2/operators/elementwise_mul_op.cu.cc",
"caffe2/operators/elementwise_ops.cu.cc",
"caffe2/operators/elu_op.cu.cc",
"caffe2/operators/enforce_finite_op.cu.cc",
"caffe2/operators/ensure_cpu_output_op.cu.cc",
"caffe2/operators/erf_op.cu.cc",
"caffe2/operators/filler_op.cu.cc",
"caffe2/operators/find_op.cu.cc",
"caffe2/operators/floor_op.cu.cc",
"caffe2/operators/gather_op.cu.cc",
"caffe2/operators/gelu_op.cu.cc",
"caffe2/operators/generate_proposals_op.cu.cc",
"caffe2/operators/generate_proposals_op_util_nms_gpu.cu.cc",
"caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu.cc",
"caffe2/operators/given_tensor_fill_op.cu.cc",
"caffe2/operators/glu_op.cu.cc",
"caffe2/operators/group_norm_op.cu.cc",
"caffe2/operators/gru_unit_op_gpu.cu.cc",
"caffe2/operators/half_float_ops.cu.cc",
"caffe2/operators/hard_sigmoid_op.cu.cc",
"caffe2/operators/instance_norm_op.cu.cc",
"caffe2/operators/integral_image_op.cu.cc",
"caffe2/operators/layer_norm_op.cu.cc",
"caffe2/operators/leaky_relu_op.cu.cc",
"caffe2/operators/lengths_pad_op.cu.cc",
"caffe2/operators/lengths_tile_op.cu.cc",
"caffe2/operators/local_response_normalization_op.cu.cc",
"caffe2/operators/logit_op.cu.cc",
"caffe2/operators/loss_op.cu.cc",
"caffe2/operators/lp_pool_op.cu.cc",
"caffe2/operators/lstm_unit_op_gpu.cu.cc",
"caffe2/operators/margin_ranking_criterion_op.cu.cc",
"caffe2/operators/max_pool_with_index.cu.cc",
"caffe2/operators/mean_op.cu.cc",
"caffe2/operators/mem_query_op.cu.cc",
"caffe2/operators/minmax_ops.cu.cc",
"caffe2/operators/moments_op.cu.cc",
"caffe2/operators/multi_class_accuracy_op.cu.cc",
"caffe2/operators/normalize_ops.cu.cc",
"caffe2/operators/one_hot_ops.cu.cc",
"caffe2/operators/pack_segments.cu.cc",
"caffe2/operators/pad_op_gpu.cu.cc",
"caffe2/operators/perplexity_op.cu.cc",
"caffe2/operators/piecewise_linear_transform_op.cu.cc",
"caffe2/operators/pool_op.cu.cc",
"caffe2/operators/pow_op.cu.cc",
"caffe2/operators/prelu_op.cu.cc",
"caffe2/operators/reciprocal_op.cu.cc",
"caffe2/operators/reduce_front_back_max_ops.cu.cc",
"caffe2/operators/reduce_front_back_sum_mean_ops.cu.cc",
"caffe2/operators/reduce_ops.cu.cc",
"caffe2/operators/reduction_ops.cu.cc",
"caffe2/operators/relu_n_op.cu.cc",
"caffe2/operators/relu_op.cu.cc",
"caffe2/operators/replace_nan_op.cu.cc",
"caffe2/operators/resize_3d_op.cu.cc",
"caffe2/operators/resize_op.cu.cc",
"caffe2/operators/reverse_packed_segs_op.cu.cc",
"caffe2/operators/rmac_regions_op.cu.cc",
"caffe2/operators/rnn/recurrent_network_op_gpu.cu.cc",
"caffe2/operators/roi_align_gradient_op.cu.cc",
"caffe2/operators/roi_align_op.cu.cc",
"caffe2/operators/roi_align_rotated_gradient_op.cu.cc",
"caffe2/operators/roi_align_rotated_op.cu.cc",
"caffe2/operators/roi_pool_op.cu.cc",
"caffe2/operators/rsqrt_op.cu.cc",
"caffe2/operators/scale_blobs_op.cu.cc",
"caffe2/operators/segment_reduction_op_gpu.cu.cc",
"caffe2/operators/selu_op.cu.cc",
"caffe2/operators/sequence_ops.cu.cc",
"caffe2/operators/sigmoid_op.cu.cc",
"caffe2/operators/sin_op.cu.cc",
"caffe2/operators/sinh_op.cu.cc",
"caffe2/operators/slice_op.cu.cc",
"caffe2/operators/softmax_ops.cu.cc",
"caffe2/operators/softplus_op.cu.cc",
"caffe2/operators/softsign_op.cu.cc",
"caffe2/operators/space_batch_op_gpu.cu.cc",
"caffe2/operators/sparse_normalize_op_gpu.cu.cc",
"caffe2/operators/sparse_to_dense_op.cu.cc",
"caffe2/operators/spatial_batch_norm_op.cu.cc",
"caffe2/operators/spatial_batch_norm_op_cudnn.cu.cc",
"caffe2/operators/stump_func_op.cu.cc",
"caffe2/operators/summarize_op.cu.cc",
"caffe2/operators/swish_op.cu.cc",
"caffe2/operators/tan_op.cu.cc",
"caffe2/operators/tanh_op.cu.cc",
"caffe2/operators/thresholded_relu_op.cu.cc",
"caffe2/operators/tile_op.cu.cc",
"caffe2/operators/top_k.cu.cc",
"caffe2/operators/transpose_op.cu.cc",
"caffe2/operators/unique_ops.cu.cc",
"caffe2/operators/upsample_op.cu.cc",
"caffe2/operators/utility_ops.cu.cc",
"caffe2/operators/weighted_sample_op.cu.cc",
"caffe2/sgd/adadelta_op_gpu.cu.cc",
"caffe2/sgd/adagrad_op_gpu.cu.cc",
"caffe2/sgd/adam_op_gpu.cu.cc",
"caffe2/sgd/fp16_momentum_sgd_op.cu.cc",
"caffe2/sgd/fp32_momentum_sgd_op.cu.cc",
"caffe2/sgd/lars_op_gpu.cu.cc",
"caffe2/sgd/momentum_sgd_op_gpu.cu.cc",
"caffe2/sgd/rmsprop_op_gpu.cu.cc",
"caffe2/sgd/yellowfin_op_gpu.cu.cc",
"caffe2/utils/math/broadcast.cu.cc",
"caffe2/utils/math/elementwise.cu.cc",
"caffe2/utils/math/reduce.cu.cc",
"caffe2/utils/math/transpose.cu.cc",
"caffe2/utils/math_gpu.cu.cc",
"caffe2/core/context_gpu.cu",
"caffe2/operators/abs_op.cu",
"caffe2/operators/accumulate_op.cu",
"caffe2/operators/accuracy_op.cu",
"caffe2/operators/acos_op.cu",
"caffe2/operators/affine_channel_op.cu",
"caffe2/operators/alias_with_name.cu",
"caffe2/operators/arg_ops.cu",
"caffe2/operators/asin_op.cu",
"caffe2/operators/assert_op.cu",
"caffe2/operators/atan_op.cu",
"caffe2/operators/batch_gather_ops.cu",
"caffe2/operators/batch_matmul_op.cu",
"caffe2/operators/batch_moments_op.cu",
"caffe2/operators/batch_permutation_op.cu",
"caffe2/operators/batch_sparse_to_dense_op.cu",
"caffe2/operators/boolean_mask_ops.cu",
"caffe2/operators/boolean_unmask_ops.cu",
"caffe2/operators/bucketize_op.cu",
"caffe2/operators/cast_op.cu",
"caffe2/operators/cbrt_op.cu",
"caffe2/operators/ceil_op.cu",
"caffe2/operators/channel_backprop_stats_op.cu",
"caffe2/operators/channel_shuffle_op.cu",
"caffe2/operators/channel_stats_op.cu",
"caffe2/operators/channelwise_conv3d_op_cudnn.cu",
"caffe2/operators/clip_op.cu",
"caffe2/operators/copy_op.cu",
"caffe2/operators/cos_op.cu",
"caffe2/operators/cosh_op.cu",
"caffe2/operators/cosine_embedding_criterion_op.cu",
"caffe2/operators/cross_entropy_op.cu",
"caffe2/operators/cube_op.cu",
"caffe2/operators/data_couple_gpu.cu",
"caffe2/operators/deform_conv_op.cu",
"caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
"caffe2/operators/distance_op.cu",
"caffe2/operators/dropout_op.cu",
"caffe2/operators/elementwise_div_op.cu",
"caffe2/operators/elementwise_linear_op.cu",
"caffe2/operators/elementwise_mul_op.cu",
"caffe2/operators/elementwise_ops.cu",
"caffe2/operators/elu_op.cu",
"caffe2/operators/enforce_finite_op.cu",
"caffe2/operators/ensure_cpu_output_op.cu",
"caffe2/operators/erf_op.cu",
"caffe2/operators/filler_op.cu",
"caffe2/operators/find_op.cu",
"caffe2/operators/floor_op.cu",
"caffe2/operators/gather_op.cu",
"caffe2/operators/gelu_op.cu",
"caffe2/operators/generate_proposals_op.cu",
"caffe2/operators/generate_proposals_op_util_nms_gpu.cu",
"caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu",
"caffe2/operators/given_tensor_fill_op.cu",
"caffe2/operators/glu_op.cu",
"caffe2/operators/group_norm_op.cu",
"caffe2/operators/gru_unit_op_gpu.cu",
"caffe2/operators/half_float_ops.cu",
"caffe2/operators/hard_sigmoid_op.cu",
"caffe2/operators/instance_norm_op.cu",
"caffe2/operators/integral_image_op.cu",
"caffe2/operators/layer_norm_op.cu",
"caffe2/operators/leaky_relu_op.cu",
"caffe2/operators/lengths_pad_op.cu",
"caffe2/operators/lengths_tile_op.cu",
"caffe2/operators/local_response_normalization_op.cu",
"caffe2/operators/logit_op.cu",
"caffe2/operators/loss_op.cu",
"caffe2/operators/lp_pool_op.cu",
"caffe2/operators/lstm_unit_op_gpu.cu",
"caffe2/operators/margin_ranking_criterion_op.cu",
"caffe2/operators/max_pool_with_index.cu",
"caffe2/operators/mean_op.cu",
"caffe2/operators/mem_query_op.cu",
"caffe2/operators/minmax_ops.cu",
"caffe2/operators/moments_op.cu",
"caffe2/operators/multi_class_accuracy_op.cu",
"caffe2/operators/normalize_ops.cu",
"caffe2/operators/one_hot_ops.cu",
"caffe2/operators/pack_segments.cu",
"caffe2/operators/pad_op_gpu.cu",
"caffe2/operators/perplexity_op.cu",
"caffe2/operators/piecewise_linear_transform_op.cu",
"caffe2/operators/pool_op.cu",
"caffe2/operators/pow_op.cu",
"caffe2/operators/prelu_op.cu",
"caffe2/operators/reciprocal_op.cu",
"caffe2/operators/reduce_front_back_max_ops.cu",
"caffe2/operators/reduce_front_back_sum_mean_ops.cu",
"caffe2/operators/reduce_ops.cu",
"caffe2/operators/reduction_ops.cu",
"caffe2/operators/relu_n_op.cu",
"caffe2/operators/relu_op.cu",
"caffe2/operators/replace_nan_op.cu",
"caffe2/operators/resize_3d_op.cu",
"caffe2/operators/resize_op.cu",
"caffe2/operators/reverse_packed_segs_op.cu",
"caffe2/operators/rmac_regions_op.cu",
"caffe2/operators/rnn/recurrent_network_op_gpu.cu",
"caffe2/operators/roi_align_gradient_op.cu",
"caffe2/operators/roi_align_op.cu",
"caffe2/operators/roi_align_rotated_gradient_op.cu",
"caffe2/operators/roi_align_rotated_op.cu",
"caffe2/operators/roi_pool_op.cu",
"caffe2/operators/rsqrt_op.cu",
"caffe2/operators/scale_blobs_op.cu",
"caffe2/operators/segment_reduction_op_gpu.cu",
"caffe2/operators/selu_op.cu",
"caffe2/operators/sequence_ops.cu",
"caffe2/operators/sigmoid_op.cu",
"caffe2/operators/sin_op.cu",
"caffe2/operators/sinh_op.cu",
"caffe2/operators/slice_op.cu",
"caffe2/operators/softmax_ops.cu",
"caffe2/operators/softplus_op.cu",
"caffe2/operators/softsign_op.cu",
"caffe2/operators/space_batch_op_gpu.cu",
"caffe2/operators/sparse_normalize_op_gpu.cu",
"caffe2/operators/sparse_to_dense_op.cu",
"caffe2/operators/spatial_batch_norm_op.cu",
"caffe2/operators/spatial_batch_norm_op_cudnn.cu",
"caffe2/operators/stump_func_op.cu",
"caffe2/operators/summarize_op.cu",
"caffe2/operators/swish_op.cu",
"caffe2/operators/tan_op.cu",
"caffe2/operators/tanh_op.cu",
"caffe2/operators/thresholded_relu_op.cu",
"caffe2/operators/tile_op.cu",
"caffe2/operators/top_k.cu",
"caffe2/operators/transpose_op.cu",
"caffe2/operators/unique_ops.cu",
"caffe2/operators/upsample_op.cu",
"caffe2/operators/utility_ops.cu",
"caffe2/operators/weighted_sample_op.cu",
"caffe2/sgd/adadelta_op_gpu.cu",
"caffe2/sgd/adagrad_op_gpu.cu",
"caffe2/sgd/adam_op_gpu.cu",
"caffe2/sgd/fp16_momentum_sgd_op.cu",
"caffe2/sgd/fp32_momentum_sgd_op.cu",
"caffe2/sgd/lars_op_gpu.cu",
"caffe2/sgd/momentum_sgd_op_gpu.cu",
"caffe2/sgd/rmsprop_op_gpu.cu",
"caffe2/sgd/yellowfin_op_gpu.cu",
"caffe2/utils/math/broadcast.cu",
"caffe2/utils/math/elementwise.cu",
"caffe2/utils/math/reduce.cu",
"caffe2/utils/math/transpose.cu",
"caffe2/utils/math_gpu.cu",
],
)
@ -1432,6 +1363,29 @@ cc_library(
],
)
py_binary(
name = "gen_op",
srcs = ["caffe2/contrib/aten/gen_op.py"],
deps = [":tools_codegen"],
)
genrule(
name = "generated_caffe2_aten_op_headers",
srcs = [
"caffe2/contrib/aten/aten_op_template.h",
"aten/src/ATen/Declarations.yaml",
],
outs = ["caffe2/caffe2/contrib/aten/gen_aten_op.h"],
cmd = """
$(location :gen_op) \
--output_prefix gen_ \
--install_dir $(@D) \
--aten_root `dirname $(location aten/src/ATen/Declarations.yaml)`/../.. \
--template_dir `dirname $(location caffe2/contrib/aten/aten_op_template.h)` \
--yaml_dir `dirname $(location aten/src/ATen/Declarations.yaml)`""",
tools = [":gen_op"],
)
cc_library(
name = "caffe2_headers",
hdrs = glob([
@ -1472,7 +1426,7 @@ cc_library(
]) + if_cuda(glob([
"caffe2/**/*.cuh",
"caffe2/image/*.h",
])),
])) + [":generated_caffe2_aten_op_headers"],
copts = CAFFE2_COPTS,
includes = [
"caffe2/contrib/aten",
@ -1554,7 +1508,7 @@ cc_library(
"@fmt",
] + if_cuda(
[
":caffe2_cpp_cuda",
":caffe2_cuda_cpp",
":aten_cuda",
"@tensorpipe//:tensorpipe_cuda",
],
@ -1567,8 +1521,8 @@ cc_library(
)
cc_library(
name = "caffe2_cpp_cuda",
srcs = [":caffe2_cuda_srcs"],
name = "caffe2_cuda_cpp",
srcs = [":caffe2_cuda_cpp_srcs"],
copts = CAFFE2_COPTS,
visibility = ["//visibility:public"],
deps = [
@ -1586,7 +1540,6 @@ cu_library(
deps = [
":aten",
":caffe2_headers",
"@cub",
"@cuda//:cublas",
"@cuda//:curand",
"@cudnn",
@ -1610,6 +1563,7 @@ PERF_COPTS = [
"-DHAVE_SHM_OPEN=1",
"-DHAVE_SHM_UNLINK=1",
"-DSLEEF_STATIC_LIBS=1",
"-DTH_BALS_MKL",
"-D_FILE_OFFSET_BITS=64",
"-DUSE_FBGEMM",
"-fvisibility-inlines-hidden",
@ -1693,10 +1647,29 @@ genrule(
srcs = ["torch/csrc/api/include/torch/version.h.in", "version.txt"],
outs = ["torch/csrc/api/include/torch/version.h"],
cmd = "$(location :gen_version_header) --template-path $(location torch/csrc/api/include/torch/version.h.in) --version-path $(location version.txt) --output-path $@",
tools = [':gen_version_header']
tools = [':gen_version_header'],
)
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"])
py_binary(
name = "stringify_file",
srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
)
generated_nvfuser_hdrs = ["generated_" + hdr for hdr in libtorch_nvfuser_generated_headers]
[
genrule(
name = name,
srcs = [src],
outs = ["nvfuser_resources/{}".format(hdr)],
cmd = "$(location :stringify_file) -i $< -o $@",
tools = [":stringify_file"],
)
for name, src, hdr in zip(generated_nvfuser_hdrs, libtorch_nvfuser_runtime_sources, libtorch_nvfuser_generated_headers)
]
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"]) + generated_nvfuser_hdrs
cc_library(
name = "torch_headers",
hdrs = if_cuda(
@ -1707,6 +1680,7 @@ cc_library(
"torch/csrc/**/*.h",
"torch/csrc/distributed/c10d/*.hpp",
"torch/lib/libshm/*.h",
"torch/csrc/generic/*.cpp",
],
exclude = [
"torch/csrc/autograd/generated/VariableType.h",
@ -1743,21 +1717,25 @@ TORCH_COPTS = COMMON_COPTS + [
"-fno-trapping-math",
]
cu_library(
name = "torch_distributed_cuda",
srcs = ["torch/csrc/distributed/c10d/quantization/quantization_gpu.cu"],
deps = [":torch_headers"],
)
cc_library(
name = "torch",
srcs = if_cuda(glob(
[
"torch/csrc/cuda/*.cpp",
"torch/csrc/autograd/functions/comm.cpp",
],
libtorch_cuda_sources,
exclude = [
"torch/csrc/cuda/python_nccl.cpp",
"torch/csrc/cuda/nccl.cpp",
"torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
],
)) + libtorch_core_sources + libtorch_distributed_sources + torch_cpp_srcs + libtorch_extra_sources + jit_core_sources + [
":cpp_generated_code",
],
copts = TORCH_COPTS + if_cuda(["-DUSE_CUDA=1"]),
copts = TORCH_COPTS,
defines = [
"CAFFE2_NIGHTLY_VERSION=20200115",
],
@ -1765,7 +1743,10 @@ cc_library(
deps = [
":caffe2",
":torch_headers",
],
] + if_cuda([
":torch_distributed_cuda",
"@cuda//:nvToolsExt",
]),
alwayslink = True,
)
@ -1783,10 +1764,9 @@ cc_library(
"**/*.h",
"**/*.cuh",
]) + [
":generated_code",
":cpp_generated_code",
],
includes = [
".",
"torch/csrc/api/include",
"torch/csrc/distributed",
"torch/lib",
@ -1794,21 +1774,17 @@ cc_library(
],
visibility = ["//visibility:public"],
deps = [
":aten_headers",
":caffe2_headers",
"//c10:headers",
":torch_headers",
],
)
cc_library(
name = "torch_python",
srcs = libtorch_python_core_sources + [":python_generated_code"],
hdrs = glob([
"torch/csrc/generic/*.cpp",
]),
deps = [
":torch",
":shm",
"@pybind11",
],
)
@ -1842,11 +1818,16 @@ cc_library(
# Torch integration tests rely on a labeled data set from the MNIST database.
# http://yann.lecun.com/exdb/mnist/
cpp_api_tests = glob(["test/cpp/api/*.cpp"])
# imethod.cpp is excluded since torch/csrc/deploy* build is not yet supported.
cpp_api_tests = glob(
["test/cpp/api/*.cpp"],
exclude = ["test/cpp/api/imethod.cpp"],
)
[
cc_test(
name = paths.split_extension(paths.basename(filename))[0].replace("-","_") + "_test",
size = "medium",
size = "medium",
srcs = [filename],
deps = [
":test_support",

View File

@ -1,7 +1,22 @@
workspace(name = "pytorch")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("//tools/rules:workspace.bzl", "new_patched_local_repository", "new_empty_repository")
load("//tools/rules:workspace.bzl", "new_patched_local_repository")
http_archive(
name = "rules_cuda",
sha256 = "f80438bee9906e9ecb1a8a4ae2365374ac1e8a283897281a2db2fb7fcf746333",
strip_prefix = "runtime-b1c7cce21ba4661c17ac72421c6a0e2015e7bef3/third_party/rules_cuda",
urls = ["https://github.com/tensorflow/runtime/archive/b1c7cce21ba4661c17ac72421c6a0e2015e7bef3.tar.gz"],
)
load("@rules_cuda//cuda:dependencies.bzl", "rules_cuda_dependencies")
rules_cuda_dependencies()
load("@rules_cc//cc:repositories.bzl", "rules_cc_toolchains")
rules_cc_toolchains()
http_archive(
name = "bazel_skylib",
@ -171,13 +186,14 @@ load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories()
local_repository(
name = "local_config_cuda",
path = "third_party/tensorflow_cuda_bazel_build",
new_local_repository(
name = "cuda",
build_file = "@//third_party:cuda.BUILD",
path = "/usr/local/cuda",
)
# Wrapper to expose local_config_cuda in an agnostic way
new_empty_repository(
name = "cuda",
build_file = "//third_party:cuda.BUILD",
new_local_repository(
name = "cudnn",
build_file = "@//third_party:cudnn.BUILD",
path = "/usr/",
)

101
third_party/cuda.BUILD vendored
View File

@ -1,43 +1,76 @@
"""
Collect all the CUDA stuff from @local_config_cuda in a single target
for convenience.
"""
# Adopted from: https://github.com/tensorflow/runtime/blob/master/third_party/rules_cuda/private/BUILD.local_cuda
# Library targets are created corresponding to BUILD.bazel's needs.
cc_library(
name = "cuda_headers",
hdrs = glob([
"include/**",
"targets/x86_64-linux/include/**",
]),
includes = [
"include",
"targets/x86_64-linux/include",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "cuda_driver",
srcs = ["lib64/stubs/libcuda.so"],
visibility = ["//visibility:public"],
)
cc_library(
name = "cuda",
srcs = ["targets/x86_64-linux/lib/libcudart.so"],
visibility = ["//visibility:public"],
deps = [
"@local_config_cuda//cuda:cublas",
"@local_config_cuda//cuda:cuda_driver",
"@local_config_cuda//cuda:cuda_headers",
"@local_config_cuda//cuda:cudart",
"@local_config_cuda//cuda:cufft",
"@local_config_cuda//cuda:curand",
],
deps = [":cuda_headers"],
)
cc_library(
name = "cupti",
deps = [
"@local_config_cuda//cuda:cupti_headers",
"@local_config_cuda//cuda:cupti_link",
],
name = "cufft",
srcs = ["targets/x86_64-linux/lib/libcufft.so"],
visibility = ["//visibility:public"],
)
[
alias(
name = lib,
actual = "@local_config_cuda//cuda:{}".format(lib),
visibility = ["//visibility:public"],
)
for lib in [
"cublas",
"cufft",
"cusolver",
"cusparse",
"curand",
"nvrtc",
"cuda_driver",
"nvToolsExt",
]
]
cc_library(
name = "cublas",
srcs = [
"targets/x86_64-linux/lib/libcublasLt.so",
"targets/x86_64-linux/lib/libcublas.so",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "curand",
srcs = ["targets/x86_64-linux/lib/libcurand.so"],
visibility = ["//visibility:public"],
)
cc_library(
name = "cusolver",
srcs = ["targets/x86_64-linux/lib/libcusolver.so"],
visibility = ["//visibility:public"],
)
cc_library(
name = "cusparse",
srcs = ["targets/x86_64-linux/lib/libcusparse.so"],
visibility = ["//visibility:public"],
)
cc_library(
name = "nvrtc",
srcs = [
"targets/x86_64-linux/lib/libnvrtc.so",
"targets/x86_64-linux/lib/libnvrtc-builtins.so",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "nvToolsExt",
srcs = [ "lib64/libnvToolsExt.so"],
visibility = ["//visibility:public"],
)

26
third_party/cudnn.BUILD vendored Normal file
View File

@ -0,0 +1,26 @@
# Adopted from: https://github.com/NVIDIA/TRTorch/blob/master/third_party/cudnn/local/BUILD
cc_library(
name = "cudnn_headers",
hdrs = ["include/cudnn.h"] + glob([
"include/cudnn+.h",
"include/cudnn_*.h",
]),
includes = ["include/"],
visibility = ["//visibility:private"],
)
cc_import(
name = "cudnn_lib",
shared_library = "lib/x86_64-linux-gnu/libcudnn.so",
visibility = ["//visibility:private"],
)
cc_library(
name = "cudnn",
visibility = ["//visibility:public"],
deps = [
"cudnn_headers",
"cudnn_lib",
],
)

View File

@ -48,8 +48,8 @@ cc_library(
cu_library(
name = "gloo_cuda",
srcs = [
"gloo/cuda.cu.cc",
"gloo/cuda_private.cu.cc",
"gloo/cuda.cu",
"gloo/cuda_private.cu",
],
visibility = ["//visibility:public"],
deps = [
@ -72,8 +72,8 @@ cc_library(
"gloo/cuda*.cc",
"gloo/common/win.cc",
"gloo/rendezvous/redis_store.cc",
],
),
]
) + if_cuda(glob(["gloo/cuda*.cc"])),
copts = [
"-std=gnu++11",
"-std=c++11",

View File

@ -58,7 +58,7 @@ def header_template_rule_impl(ctx):
CcInfo(compilation_context = cc_common.create_compilation_context(
# pass out the include path for finding this header
includes = depset([ctx.outputs.out.dirname, ctx.bin_dir.path]),
system_includes = depset([ctx.attr.include, ctx.outputs.out.dirname, ctx.bin_dir.path]),
# and the actual header here.
headers = depset([ctx.outputs.out]),
@ -68,6 +68,7 @@ def header_template_rule_impl(ctx):
header_template_rule = rule(
attrs = {
"out": attr.output(mandatory = True),
"include": attr.string(),
"src": attr.label(
mandatory = True,
allow_single_file = True,

View File

@ -1,5 +0,0 @@
# Config for CUDA
This is a checked-in copy of the auto-generated config for building CUDA code with bazel. The content of this folder was generated from https://github.com/tensorflow/tensorflow `./configure` execution and then edited manually to fit the pytorch needs.
The LICENSE for the TensorFlow project is APACHE 2. The full LICENSE file could be found here https://github.com/tensorflow/tensorflow/blob/master/LICENSE.

View File

@ -1 +0,0 @@
workspace(name = "local_config_cuda")

View File

@ -1,451 +0,0 @@
licenses([
"restricted",
"reciprocal",
"notice",
]) # MPL2, portions GPL v3, LGPL v3, BSD-like
package(default_visibility = ["//visibility:public"])
config_setting(
name = "using_nvcc",
values = {
"define": "using_cuda_nvcc=true",
},
)
config_setting(
name = "using_clang",
values = {
"define": "using_cuda_clang=true",
},
)
# Equivalent to using_clang && -c opt.
config_setting(
name = "using_clang_opt",
values = {
"define": "using_cuda_clang=true",
"compilation_mode": "opt",
},
)
config_setting(
name = "darwin",
values = {"cpu": "darwin"},
)
cc_library(
name = "cuda_headers",
hdrs = [
":cuda-include",
":cudnn-include",
],
includes = [
".",
"include",
],
)
cc_library(
name = "cudnn_headers",
hdrs = [
":cudnn-include",
],
includes = [
".",
"include",
],
)
cc_library(
name = "cudart_static",
linkopts = [
"-L/usr/local/cuda/lib64",
],
)
cc_library(
name = "cuda_driver",
linkopts = ["-lcuda"],
deps = [":linker_search_path"],
)
# Provides the RPATH for Nvidia-less sytems to be able to run binaries linked to libcuda.
cc_library(
name = "driver_stub_runtime",
linkopts = [
"-Wl,-rpath,/usr/local/cuda/lib64/stubs",
],
deps = [":cuda_driver"],
)
cc_library(
name = "linker_search_path",
linkopts = [
"-L/usr/local/cuda/lib64",
"-L/usr/local/cuda/lib64/stubs",
"-Wl,-rpath-link,/usr/local/cuda/lib64",
"-Wl,-rpath-link,/usr/local/cuda/lib64/stubs",
],
)
[
cc_library(
name = libname,
linkopts = ["-l" + libname] + (["-lgomp"] if (libname == "cusolver") else []),
linkstatic = True,
deps = [":linker_search_path"],
)
for libname in [
"cublas",
"cudart",
"cudnn",
"cufft",
"curand",
"cusolver",
"cusparse",
"nvrtc",
"nvToolsExt",
]
]
cc_library(
name = "cuda",
deps = [
":cublas",
":cuda_headers",
":cudart",
":cudnn",
":cufft",
":curand",
":nvToolsExt",
],
)
# NVIDIA Performance Primitives (http://docs.nvidia.com/cuda/npp/modules.html))
# used by OpenCV
cc_library(
name = "nppi",
linkopts = [
"-lnppc",
"-lnppial",
"-lnppicom",
"-lnppidei",
"-lnppif",
"-lnppig",
"-lnppim",
"-lnppist",
"-lnppitc",
"-lnpps",
],
linkstatic = True,
deps = [":linker_search_path"],
)
# NVIDIA Management Library
cc_library(
name = "nvml",
linkopts = [
"-lnvidia-ml",
"-Wl,-rpath,/usr/lib/nvidia-410",
"-Wl,-rpath,/usr/lib/nvidia-390",
"-Wl,-rpath,/usr/lib/nvidia-387",
"-Wl,-rpath,/usr/lib/nvidia-384",
],
deps = [":linker_search_path"],
)
cc_library(
name = "cupti_headers",
hdrs = [
":cuda-extras",
],
includes = [
".",
"extras/CUPTI/include/",
],
)
# cupti .so exposed at linktime
cc_library(
name = "cupti_link",
linkopts = [
"-L/usr/local/cuda/extras/CUPTI/lib64",
"-lcupti",
],
)
cc_library(
name = "libdevice_root",
data = [":cuda-nvvm"],
)
CUDA_INCLUDES_FILES = [
"include/builtin_types.h",
"include/channel_descriptor.h",
"include/CL/cl_egl.h",
"include/CL/cl_ext.h",
"include/CL/cl_gl_ext.h",
"include/CL/cl_gl.h",
"include/CL/cl.h",
"include/CL/cl.hpp",
"include/CL/cl_platform.h",
"include/CL/opencl.h",
"include/common_functions.h",
"include/cooperative_groups.h",
"include/cooperative_groups_helpers.h",
"include/crt/common_functions.h",
"include/crt/device_double_functions.h",
"include/crt/device_double_functions.hpp",
"include/crt/device_functions.h",
"include/crt/device_functions.hpp",
"include/crt/func_macro.h",
"include/crt/host_config.h",
"include/crt/host_defines.h",
"include/crt/host_runtime.h",
"include/crt/math_functions.h",
"include/crt/math_functions.hpp",
"include/crt/mma.h",
"include/crt/mma.hpp",
"include/crt/nvfunctional",
"include/crt/sm_70_rt.h",
"include/crt/sm_70_rt.hpp",
"include/crt/storage_class.h",
# TODO: figure out why on a CI machine with CUDA 10.2 it's not present
# "include/cublas_api.h",
# "include/cublas.h",
# "include/cublas_v2.h",
# "include/cublasXt.h",
"include/cuComplex.h",
"include/cuda_device_runtime_api.h",
"include/cudaEGL.h",
"include/cuda_egl_interop.h",
"include/cuda_fp16.h",
"include/cuda_fp16.hpp",
"include/cudaGL.h",
"include/cuda_gl_interop.h",
"include/cuda.h",
"include/cudalibxt.h",
"include/cuda_occupancy.h",
"include/cuda_profiler_api.h",
"include/cudaProfiler.h",
"include/cudart_platform.h",
"include/cuda_runtime_api.h",
"include/cuda_runtime.h",
"include/cuda_surface_types.h",
"include/cuda_texture_types.h",
"include/cudaVDPAU.h",
"include/cuda_vdpau_interop.h",
"include/cufft.h",
"include/cufftw.h",
"include/cufftXt.h",
"include/curand_discrete2.h",
"include/curand_discrete.h",
"include/curand_globals.h",
"include/curand.h",
"include/curand_kernel.h",
"include/curand_lognormal.h",
"include/curand_mrg32k3a.h",
"include/curand_mtgp32dc_p_11213.h",
"include/curand_mtgp32.h",
"include/curand_mtgp32_host.h",
"include/curand_mtgp32_kernel.h",
"include/curand_normal.h",
"include/curand_normal_static.h",
"include/curand_philox4x32_x.h",
"include/curand_poisson.h",
"include/curand_precalc.h",
"include/curand_uniform.h",
"include/cusolver_common.h",
"include/cusolverDn.h",
"include/cusolverRf.h",
"include/cusolverSp.h",
"include/cusolverSp_LOWLEVEL_PREVIEW.h",
"include/cusparse.h",
"include/cusparse_v2.h",
"include/device_atomic_functions.h",
"include/device_atomic_functions.hpp",
"include/device_double_functions.h",
"include/device_functions.h",
"include/device_launch_parameters.h",
"include/device_types.h",
"include/driver_functions.h",
"include/driver_types.h",
"include/fatBinaryCtl.h",
"include/fatbinary.h",
"include/host_config.h",
"include/host_defines.h",
"include/library_types.h",
"include/math_constants.h",
"include/math_functions.h",
"include/mma.h",
"include/nppcore.h",
"include/nppdefs.h",
"include/npp.h",
"include/nppi_arithmetic_and_logical_operations.h",
"include/nppi_color_conversion.h",
"include/nppi_compression_functions.h",
"include/nppi_computer_vision.h",
"include/nppi_data_exchange_and_initialization.h",
"include/nppi_filtering_functions.h",
"include/nppi_geometry_transforms.h",
"include/nppi.h",
"include/nppi_linear_transforms.h",
"include/nppi_morphological_operations.h",
"include/nppi_statistics_functions.h",
"include/nppi_support_functions.h",
"include/nppi_threshold_and_compare_operations.h",
"include/npps_arithmetic_and_logical_operations.h",
"include/npps_conversion_functions.h",
"include/npps_filtering_functions.h",
"include/npps.h",
"include/npps_initialization.h",
"include/npps_statistics_functions.h",
"include/npps_support_functions.h",
# Note: CUDA 10.0 only
# "include/nppversion.h",
# TODO: figure out why on a CI machine with CUDA 10.2 it's not present
# "include/nvblas.h",
"include/nvfunctional",
"include/nvgraph.h",
"include/nvjpeg.h",
"include/nvml.h",
"include/nvrtc.h",
"include/nvToolsExtCuda.h",
"include/nvToolsExtCudaRt.h",
"include/nvToolsExt.h",
"include/nvToolsExtMeta.h",
"include/nvToolsExtSync.h",
"include/nvtx3/nvToolsExtCuda.h",
"include/nvtx3/nvToolsExtCudaRt.h",
"include/nvtx3/nvToolsExt.h",
"include/nvtx3/nvToolsExtOpenCL.h",
"include/nvtx3/nvToolsExtSync.h",
"include/nvtx3/nvtxDetail/nvtxImplCore.h",
"include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h",
"include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h",
"include/nvtx3/nvtxDetail/nvtxImpl.h",
"include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h",
"include/nvtx3/nvtxDetail/nvtxImplSync_v3.h",
"include/nvtx3/nvtxDetail/nvtxInitDecls.h",
"include/nvtx3/nvtxDetail/nvtxInitDefs.h",
"include/nvtx3/nvtxDetail/nvtxInit.h",
"include/nvtx3/nvtxDetail/nvtxLinkOnce.h",
"include/nvtx3/nvtxDetail/nvtxTypes.h",
"include/sm_20_atomic_functions.h",
"include/sm_20_atomic_functions.hpp",
"include/sm_20_intrinsics.h",
"include/sm_20_intrinsics.hpp",
"include/sm_30_intrinsics.h",
"include/sm_30_intrinsics.hpp",
"include/sm_32_atomic_functions.h",
"include/sm_32_atomic_functions.hpp",
"include/sm_32_intrinsics.h",
"include/sm_32_intrinsics.hpp",
"include/sm_35_atomic_functions.h",
"include/sm_35_intrinsics.h",
"include/sm_60_atomic_functions.h",
"include/sm_60_atomic_functions.hpp",
"include/sm_61_intrinsics.h",
"include/sm_61_intrinsics.hpp",
# CUDA 10.0 only
# "include/sobol_direction_vectors.h",
"include/surface_functions.h",
"include/surface_functions.hpp",
"include/surface_indirect_functions.h",
"include/surface_indirect_functions.hpp",
"include/surface_types.h",
"include/texture_fetch_functions.h",
"include/texture_fetch_functions.hpp",
"include/texture_indirect_functions.h",
"include/texture_indirect_functions.hpp",
"include/texture_types.h",
"include/vector_functions.h",
"include/vector_functions.hpp",
"include/vector_types.h",
]
genrule(
name = "cuda-include",
outs = CUDA_INCLUDES_FILES,
cmd = " && ".join([
"ln -s /usr/local/cuda/{relpath} $(@D)/{relpath}".format(relpath = p)
for p in CUDA_INCLUDES_FILES
]),
local = True,
tags = ["no-cache"],
)
CUDA_NVVM_FILES = [
"nvvm/bin/cicc",
"nvvm/include/nvvm.h",
"nvvm/lib64/libnvvm.so",
"nvvm/lib64/libnvvm.so.3",
"nvvm/lib64/libnvvm.so.3.3.0",
"nvvm/libdevice/libdevice.10.bc",
]
genrule(
name = "cuda-nvvm",
outs = CUDA_NVVM_FILES,
cmd = " && ".join([
"ln -s /usr/local/cuda/{relpath} $(@D)/{relpath}".format(relpath = p)
for p in CUDA_NVVM_FILES
]),
local = True,
tags = ["no-cache"],
)
CUDA_EXTRAS_FILES = [
"extras/CUPTI/include/cuda_stdint.h",
"extras/CUPTI/include/cupti.h",
"extras/CUPTI/include/cupti_activity.h",
"extras/CUPTI/include/cupti_callbacks.h",
"extras/CUPTI/include/cupti_driver_cbid.h",
"extras/CUPTI/include/cupti_events.h",
"extras/CUPTI/include/cupti_metrics.h",
"extras/CUPTI/include/cupti_nvtx_cbid.h",
"extras/CUPTI/include/cupti_result.h",
"extras/CUPTI/include/cupti_runtime_cbid.h",
"extras/CUPTI/include/cupti_version.h",
"extras/CUPTI/include/generated_cuda_gl_interop_meta.h",
"extras/CUPTI/include/generated_cuda_meta.h",
"extras/CUPTI/include/generated_cuda_runtime_api_meta.h",
"extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h",
"extras/CUPTI/include/generated_cudaGL_meta.h",
"extras/CUPTI/include/generated_cudaVDPAU_meta.h",
"extras/CUPTI/include/generated_nvtx_meta.h",
"extras/CUPTI/include/GL/gl.h",
"extras/CUPTI/include/GL/glew.h",
"extras/CUPTI/include/GL/glext.h",
"extras/CUPTI/include/GL/glu.h",
"extras/CUPTI/include/GL/glut.h",
"extras/CUPTI/include/GL/glx.h",
"extras/CUPTI/include/GL/glxext.h",
"extras/CUPTI/include/GL/wglew.h",
"extras/CUPTI/include/GL/wglext.h",
"extras/CUPTI/include/openacc/cupti_openacc.h",
]
genrule(
name = "cuda-extras",
outs = CUDA_EXTRAS_FILES,
cmd = " && ".join([
"ln -s /usr/local/cuda/{relpath} $(@D)/{relpath}".format(relpath = p)
for p in CUDA_EXTRAS_FILES
]),
local = True,
tags = ["no-cache"],
)
genrule(
name = "cudnn-include",
outs = [
"include/cudnn.h",
],
cmd = """
ln -s /usr/include/cudnn.h $(@D)/cudnn.h""",
local = True,
tags = ["no-cache"],
)

View File

@ -162,8 +162,8 @@ cc_library(
cc_library(
name = "tensorpipe_cuda",
srcs = TENSORPIPE_CUDA_SOURCES,
hdrs = TENSORPIPE_CUDA_HEADERS + [":tensorpipe_cuda_config_header"],
srcs = glob(TENSORPIPE_CUDA_SOURCES),
hdrs = glob(TENSORPIPE_CUDA_HEADERS) + [":tensorpipe_cuda_config_header"],
includes = [
".",
],

View File

@ -1,3 +1,6 @@
# gpu support is not available
def cu_library(**kwargs):
pass
load("@rules_cuda//cuda:defs.bzl", "cuda_library")
NVCC_COPTS = ["--expt-relaxed-constexpr", "--expt-extended-lambda"]
def cu_library(name, srcs, copts = [], **kwargs):
cuda_library(name, srcs = srcs, copts = NVCC_COPTS + copts, **kwargs)