diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 00000000000..bb94f9a93f6 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,3 @@ +build --copt=--std=c++14 +build --copt=-I. +build --copt=-isystem --copt bazel-out/k8-fastbuild/bin diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 00000000000..ccbccc3dc62 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +2.2.0 diff --git a/.circleci/cimodel/data/pytorch_build_definitions.py b/.circleci/cimodel/data/pytorch_build_definitions.py index 2d90341f5c7..b042d8d8231 100644 --- a/.circleci/cimodel/data/pytorch_build_definitions.py +++ b/.circleci/cimodel/data/pytorch_build_definitions.py @@ -187,9 +187,9 @@ def instantiate_configs(): root = get_root() found_configs = conf_tree.dfs(root) - restrict_phases = None for fc in found_configs: + restrict_phases = None distro_name = fc.find_prop("distro_name") compiler_name = fc.find_prop("compiler_name") compiler_version = fc.find_prop("compiler_version") diff --git a/.circleci/config.yml b/.circleci/config.yml index c9aeb2efd60..be2a2d1f904 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1557,6 +1557,77 @@ jobs: cd ${PROJ_ROOT}/ios/TestApp instruments -s -devices fastlane scan + pytorch_linux_bazel_build: + <<: *pytorch_params + machine: + image: ubuntu-1604:201903-01 + steps: + # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml + - attach_scripts + - setup_linux_system_environment + - checkout + - setup_ci_environment + - run: + name: Bazel Build + no_output_timeout: "1h" + command: | + set -e + # Pull Docker image and run build + echo "DOCKER_IMAGE: "${DOCKER_IMAGE} + time docker pull ${DOCKER_IMAGE} >/dev/null + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) + + echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT" + + git submodule sync && git submodule update -q --init --recursive + + docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace + + export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' + + echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts + + # Push intermediate Docker image for next phase to use + if [ -z "${BUILD_ONLY}" ]; then + # Augment our output image name with bazel to avoid collisions + output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1} + export COMMIT_DOCKER_IMAGE=$output_image + docker commit "$id" ${COMMIT_DOCKER_IMAGE} + time docker push ${COMMIT_DOCKER_IMAGE} + fi + + pytorch_linux_bazel_test: + <<: *pytorch_params + machine: + image: ubuntu-1604:201903-01 + steps: + # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml + - attach_scripts + - setup_linux_system_environment + - setup_ci_environment + - run: + name: Test + no_output_timeout: "90m" + command: | + set -e + output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1} + export COMMIT_DOCKER_IMAGE=$output_image + echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE} + + time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null + + if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + else + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + fi + + if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then + export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' + else + export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' + fi + echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts promote_s3: <<: *promote_common @@ -2438,6 +2509,20 @@ workflows: docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:e43973a9-9d5a-4138-9181-a08a0fc55e2f" use_cuda_docker_runtime: "1" resource_class: gpu.medium + - pytorch_linux_bazel_build: + name: pytorch_bazel_build + requires: + - setup + build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-build" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026" + resource_class: large + - pytorch_linux_bazel_test: + name: pytorch_bazel_test + requires: + - setup + - pytorch_bazel_build + build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-test" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026" - caffe2_linux_build: name: caffe2_onnx_main_py3_6_clang7_ubuntu16_04_build requires: diff --git a/.circleci/generate_config_yml.py b/.circleci/generate_config_yml.py index d0cd80f2284..a6f2d4b0f80 100755 --- a/.circleci/generate_config_yml.py +++ b/.circleci/generate_config_yml.py @@ -101,6 +101,7 @@ YAML_SOURCES = [ File("workflows-pytorch-ios-builds.yml"), File("workflows-pytorch-mobile-builds.yml"), File("workflows-pytorch-ge-config-tests.yml"), + File("workflows-pytorch-bazel-builds.yml"), Listgen(caffe2_build_definitions.get_workflow_jobs, 3), File("workflows-binary-builds-smoke-subset.yml"), Listgen(binary_build_definitions.get_binary_smoke_test_jobs, 3), diff --git a/.circleci/verbatim-sources/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs-custom.yml index 2e167b3326e..dfda514bcea 100644 --- a/.circleci/verbatim-sources/job-specs-custom.yml +++ b/.circleci/verbatim-sources/job-specs-custom.yml @@ -440,3 +440,74 @@ cd ${PROJ_ROOT}/ios/TestApp instruments -s -devices fastlane scan + pytorch_linux_bazel_build: + <<: *pytorch_params + machine: + image: ubuntu-1604:201903-01 + steps: + # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml + - attach_scripts + - setup_linux_system_environment + - checkout + - setup_ci_environment + - run: + name: Bazel Build + no_output_timeout: "1h" + command: | + set -e + # Pull Docker image and run build + echo "DOCKER_IMAGE: "${DOCKER_IMAGE} + time docker pull ${DOCKER_IMAGE} >/dev/null + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) + + echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT" + + git submodule sync && git submodule update -q --init --recursive + + docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace + + export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' + + echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts + + # Push intermediate Docker image for next phase to use + if [ -z "${BUILD_ONLY}" ]; then + # Augment our output image name with bazel to avoid collisions + output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1} + export COMMIT_DOCKER_IMAGE=$output_image + docker commit "$id" ${COMMIT_DOCKER_IMAGE} + time docker push ${COMMIT_DOCKER_IMAGE} + fi + + pytorch_linux_bazel_test: + <<: *pytorch_params + machine: + image: ubuntu-1604:201903-01 + steps: + # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml + - attach_scripts + - setup_linux_system_environment + - setup_ci_environment + - run: + name: Test + no_output_timeout: "90m" + command: | + set -e + output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1} + export COMMIT_DOCKER_IMAGE=$output_image + echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE} + + time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null + + if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + else + export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE}) + fi + + if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then + export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' + else + export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1' + fi + echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts diff --git a/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml b/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml new file mode 100644 index 00000000000..825ea500ad1 --- /dev/null +++ b/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml @@ -0,0 +1,14 @@ + - pytorch_linux_bazel_build: + name: pytorch_bazel_build + requires: + - setup + build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-build" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026" + resource_class: large + - pytorch_linux_bazel_test: + name: pytorch_bazel_test + requires: + - setup + - pytorch_bazel_build + build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-test" + docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026" diff --git a/.gitignore b/.gitignore index c27e2b80e1e..380ff113422 100644 --- a/.gitignore +++ b/.gitignore @@ -255,3 +255,6 @@ TAGS # clangd background index .clangd/ + +# bazel symlinks +bazel-* diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index 91dd0e16eac..1de48d92a4d 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -180,65 +180,72 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then export CXX=clang++ fi +if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then + set -e -# check that setup.py would fail with bad arguments -echo "The next three invocations are expected to fail with invalid command error messages." -( ! get_exit_code python setup.py bad_argument ) -( ! get_exit_code python setup.py clean] ) -( ! get_exit_code python setup.py clean bad_argument ) + get_bazel -if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then - - # ppc64le build fails when WERROR=1 - # set only when building other architectures - # only use for "python setup.py install" line - if [[ "$BUILD_ENVIRONMENT" != *ppc64le* && "$BUILD_ENVIRONMENT" != *clang* ]]; then - WERROR=1 python setup.py install - else - python setup.py install - fi - - # TODO: I'm not sure why, but somehow we lose verbose commands - set -x - - if which sccache > /dev/null; then - echo 'PyTorch Build Statistics' - sccache --show-stats - fi - - assert_git_not_dirty - - # Build custom operator tests. - CUSTOM_OP_BUILD="$PWD/../custom-op-build" - CUSTOM_OP_TEST="$PWD/test/custom_operator" - python --version - SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" - mkdir "$CUSTOM_OP_BUILD" - pushd "$CUSTOM_OP_BUILD" - cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" - make VERBOSE=1 - popd - assert_git_not_dirty + tools/bazel build :torch else - # Test standalone c10 build - if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda10.1-cudnn7-py3* ]]; then - mkdir -p c10/build - pushd c10/build - cmake .. - make -j + # check that setup.py would fail with bad arguments + echo "The next three invocations are expected to fail with invalid command error messages." + ( ! get_exit_code python setup.py bad_argument ) + ( ! get_exit_code python setup.py clean] ) + ( ! get_exit_code python setup.py clean bad_argument ) + + if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then + + # ppc64le build fails when WERROR=1 + # set only when building other architectures + # only use for "python setup.py install" line + if [[ "$BUILD_ENVIRONMENT" != *ppc64le* && "$BUILD_ENVIRONMENT" != *clang* ]]; then + WERROR=1 python setup.py install + else + python setup.py install + fi + + # TODO: I'm not sure why, but somehow we lose verbose commands + set -x + + if which sccache > /dev/null; then + echo 'PyTorch Build Statistics' + sccache --show-stats + fi + + assert_git_not_dirty + + # Build custom operator tests. + CUSTOM_OP_BUILD="$PWD/../custom-op-build" + CUSTOM_OP_TEST="$PWD/test/custom_operator" + python --version + SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" + mkdir "$CUSTOM_OP_BUILD" + pushd "$CUSTOM_OP_BUILD" + cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" + make VERBOSE=1 popd assert_git_not_dirty - fi + else + # Test standalone c10 build + if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda10.1-cudnn7-py3* ]]; then + mkdir -p c10/build + pushd c10/build + cmake .. + make -j + popd + assert_git_not_dirty + fi - # Test no-Python build - echo "Building libtorch" - # NB: Install outside of source directory (at the same level as the root - # pytorch folder) so that it doesn't get cleaned away prior to docker push. - BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py - mkdir -p ../cpp-build/caffe2 - pushd ../cpp-build/caffe2 - WERROR=1 VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY - popd + # Test no-Python build + echo "Building libtorch" + # NB: Install outside of source directory (at the same level as the root + # pytorch folder) so that it doesn't get cleaned away prior to docker push. + BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py + mkdir -p ../cpp-build/caffe2 + pushd ../cpp-build/caffe2 + WERROR=1 VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY + popd + fi fi # Test XLA build diff --git a/.jenkins/pytorch/common.sh b/.jenkins/pytorch/common.sh index 60cc647ae68..38519c163ac 100644 --- a/.jenkins/pytorch/common.sh +++ b/.jenkins/pytorch/common.sh @@ -187,3 +187,12 @@ function file_diff_from_base() { set -e git diff --name-only "$(git merge-base origin master HEAD)" > "$1" } + +function get_bazel() { + # download bazel version + wget https://github.com/bazelbuild/bazel/releases/download/2.2.0/bazel-2.2.0-linux-x86_64 -O tools/bazel + # verify content + echo 'b2f002ea0e6194a181af6ac84cd94bd8dc797722eb2354690bebac92dda233ff tools/bazel' | sha256sum --quiet -c + + chmod +x tools/bazel +} diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 9119880e510..145041e006c 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -42,7 +42,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then fi # --user breaks ppc64le builds and these packages are already in ppc64le docker -if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then +if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]] && [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then # JIT C++ extensions require ninja. pip_install --user ninja # ninja is installed in /var/lib/jenkins/.local/bin @@ -252,7 +252,15 @@ test_backward_compatibility() { assert_git_not_dirty } -if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then +test_bazel() { + set -e + + get_bazel + + tools/bazel test --test_tag_filters=-gpu-required --test_filter=-*_CUDA :all_tests +} + +if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then (cd test && python -c "import torch; print(torch.__config__.show())") (cd test && python -c "import torch; print(torch.__config__.parallel_info())") fi @@ -278,6 +286,8 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-test2 || "${JOB_BASE_NAME}" == *-test2 ]]; t test_aten test_libtorch test_custom_script_ops +elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then + test_bazel else test_torchvision test_python_nn diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 00000000000..7fa034b27d9 --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,2499 @@ +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_proto_library", "cc_test") +load("//third_party:substitution.bzl", "template_rule") +load("//tools/rules:cu.bzl", "cu_library") +load("//tools/config:defs.bzl", "if_cuda") +load("//:aten.bzl", "intern_build_aten_ops") + +COMMON_COPTS = [ + "-DHAVE_MALLOC_USABLE_SIZE=1", + "-DHAVE_MMAP=1", + "-DHAVE_SHM_OPEN=1", + "-DHAVE_SHM_UNLINK=1", + "-D_FILE_OFFSET_BITS=64", + "-DHAVE_GCC_GET_CPUID", + "-DUSE_GCC_GET_CPUID", + "-DTH_HAVE_THREAD", + "-DUSE_FBGEMM", + "-DUSE_DISTRIBUTED", + "-DAT_PARALLEL_NATIVE=1", + "-DATEN_THREADING=NATIVE", + "-DNO_CUDNN_DESTROY_HANDLE", +] + if_cuda([ + "-DUSE_CUDA", + "-DUSE_CUDNN", +]) + +# c10 +template_rule( + name = "cmake_macros_h", + src = "c10/macros/cmake_macros.h.in", + out = "c10/macros/cmake_macros.h", + substitutions = { + "cmakedefine": "define", + "#define FEATURE_TORCH_MOBILE": "/* #undef FEATURE_TORCH_MOBILE */", + "#define USE_STATIC_DISPATCH": "/* #undef USE_STATIC_DISPATCH */", + "#define C10_USE_NUMA": "/* #undef C10_USE_NUMA */", + }, +) + +template_rule( + name = "cuda_cmake_macros_h", + src = "c10/cuda/impl/cuda_cmake_macros.h.in", + out = "c10/cuda/impl/cuda_cmake_macros.h", + substitutions = { + "cmakedefine": "define", + }, +) + +cc_library( + name = "c10_headers", + hdrs = glob([ + "c10/core/*.h", + "c10/core/impl/*.h", + "c10/cuda/*.h", + "c10/cuda/impl/*.h", + "c10/macros/*.h", + "c10/util/*.h", + ]) + [ + "c10/macros/cmake_macros.h", + "c10/cuda/impl/cuda_cmake_macros.h", + ], + deps = [ + "@com_github_gflags_gflags//:gflags", + "@com_github_glog//:glog", + ], +) + +cc_library( + name = "c10", + srcs = glob([ + "c10/core/*.cpp", + "c10/core/impl/*.cpp", + "c10/util/*.cpp", + ]) + if_cuda( + glob([ + "c10/cuda/*.cpp", + "c10/cuda/impl/*.cpp", + ]), + [], + ), + copts = ["-DCAFFE2_BUILD_MAIN_LIB"], + deps = [ + ":c10_headers", + ] + if_cuda( + ["@cuda"], + [], + ), + alwayslink = True, +) + +cc_test( + name = "c10_tests", + size = "small", + srcs = glob([ + "c10/test/util/*.cpp", + "c10/test/util/*.h", + "c10/test/core/*.cpp", + "c10/test/core/impl/*.cpp", + ]), + copts = ["-Wno-deprecated-declarations"], + deps = [ + ":c10", + ":c10_headers", + "@com_google_googletest//:gtest_main", + ], +) + +py_binary( + name = "gen", + srcs = ["aten/src/ATen/gen.py"], +) + +genrule( + name = "generated_cpp", + srcs = [ + "aten/src/ATen/Declarations.cwrap", + "aten/src/THCUNN/generic/THCUNN.h", + "aten/src/ATen/nn.yaml", + "aten/src/ATen/native/native_functions.yaml", + ] + glob(["aten/src/ATen/templates/**"]), + outs = [ + "aten/src/ATen/Declarations.yaml", + "aten/src/ATen/CPUType.h", + "aten/src/ATen/CPUType.cpp", + "aten/src/ATen/Functions.h", + "aten/src/ATen/LegacyTHFunctionsCPU.h", + "aten/src/ATen/LegacyTHFunctionsCPU.cpp", + "aten/src/ATen/NativeFunctions.h", + "aten/src/ATen/MkldnnCPUType.h", + "aten/src/ATen/MkldnnCPUType.cpp", + "aten/src/ATen/QuantizedCPUType.h", + "aten/src/ATen/QuantizedCPUType.cpp", + "aten/src/ATen/SparseCPUType.h", + "aten/src/ATen/SparseCPUType.cpp", + "aten/src/ATen/TypeDefault.h", + "aten/src/ATen/TypeDefault.cpp", + "aten/src/ATen/core/TensorBody.h", + "aten/src/ATen/core/TensorMethods.h", + "aten/src/ATen/core/OpsAlreadyMovedToC10.cpp", + ], + cmd = "$(location :gen) --source-path aten/src/ATen --install_dir `dirname $(location aten/src/ATen/Declarations.yaml)` aten/src/ATen/Declarations.cwrap aten/src/THCUNN/generic/THCUNN.h aten/src/ATen/nn.yaml aten/src/ATen/native/native_functions.yaml", + tools = [":gen"], +) + +py_library( + name = "code_template", + srcs = ["aten/src/ATen/code_template.py"], + imports = ["aten"], +) + +py_library( + name = "tools_autograd", + srcs = glob(["tools/autograd/*.py"]), + data = glob([ + "tools/autograd/*.yaml", + "tools/autograd/templates/*", + ]), + deps = [":code_template"], +) + +py_library( + name = "tools_jit", + srcs = glob(["tools/jit/*.py"]), + data = glob(["tools/jit/templates/*"]), +) + +py_binary( + name = "generate_code", + srcs = ["tools/setup_helpers/generate_code.py"], + deps = [ + ":tools_autograd", + ":tools_jit", + ], +) + +genrule( + name = "generated_code", + srcs = [ + "aten/src/ATen/Declarations.yaml", + ], + outs = [ + "torch/csrc/autograd/generated/python_functions.h", + "torch/csrc/autograd/generated/python_functions.cpp", + "torch/csrc/autograd/generated/python_variable_methods.cpp", + "torch/csrc/autograd/generated/python_torch_functions.cpp", + "torch/csrc/autograd/generated/python_nn_functions.cpp", + "torch/csrc/autograd/generated/VariableType.h", + "torch/csrc/autograd/generated/VariableType_0.cpp", + "torch/csrc/autograd/generated/VariableType_1.cpp", + "torch/csrc/autograd/generated/VariableType_2.cpp", + "torch/csrc/autograd/generated/VariableType_3.cpp", + "torch/csrc/autograd/generated/VariableType_4.cpp", + # "torch/csrc/autograd/generated/VariableTypeEverything.cpp", + "torch/csrc/autograd/generated/RegistrationDeclarations.h", + "torch/csrc/autograd/generated/Functions.h", + "torch/csrc/autograd/generated/Functions.cpp", + "torch/csrc/autograd/generated/variable_factories.h", + "torch/csrc/jit/generated/register_aten_ops_0.cpp", + "torch/csrc/jit/generated/register_aten_ops_1.cpp", + "torch/csrc/jit/generated/register_aten_ops_2.cpp", + ], + cmd = "$(location :generate_code) --install_dir `dirname $(location torch/csrc/autograd/generated/variable_factories.h)`/../.. --declarations-path $(location aten/src/ATen/Declarations.yaml) --nn-path aten/src", + tools = [":generate_code"], +) + +exports_files( + srcs = ["aten/src/ATen/cpu/tbb/extra/version_string.ver.in"], +) + +# ATen +filegroup( + name = "aten_base_cpp", + srcs = glob([ + "aten/src/ATen/*.cpp", + "aten/src/ATen/detail/*.cpp", + "aten/src/ATen/cpu/*.cpp", + ]), +) + +filegroup( + name = "ATen_CORE_SRCS", + srcs = glob( + [ + "aten/src/ATen/core/**/*.cpp", + ], + exclude = [ + "aten/src/ATen/core/**/*_test.cpp", + ], + ), +) + +filegroup( + name = "aten_native_cpp", + srcs = glob(["aten/src/ATen/native/*.cpp"]), +) + +filegroup( + name = "aten_native_sparse_cpp", + srcs = glob(["aten/src/ATen/native/sparse/*.cpp"]), +) + +filegroup( + name = "aten_native_quantized_cpp", + srcs = glob( + [ + "aten/src/ATen/native/quantized/*.cpp", + "aten/src/ATen/native/quantized/cpu/*.cpp", + ], + ), +) + +filegroup( + name = "aten_native_mkl_cpp", + srcs = glob(["aten/src/ATen/native/mkl/*.cpp"]), +) + +filegroup( + name = "aten_native_mkldnn_cpp", + srcs = glob(["aten/src/ATen/native/mkldnn/*.cpp"]), +) + +filegroup( + name = "aten_native_xnnpack", + srcs = glob(["aten/src/ATen/native/xnnpack/*.cpp"]), +) + +filegroup( + name = "ATen_QUANTIZED_SRCS", + srcs = glob( + [ + "aten/src/ATen/quantized/**/*.cpp", + ], + exclude = [ + "aten/src/ATen/quantized/**/*_test.cpp", + ], + ), +) + +filegroup( + name = "th_srcs", + srcs = [ + "aten/src/TH/THAllocator.cpp", + "aten/src/TH/THBlas.cpp", + "aten/src/TH/THDiskFile.cpp", + "aten/src/TH/THFile.cpp", + "aten/src/TH/THGeneral.cpp", + "aten/src/TH/THLapack.cpp", + "aten/src/TH/THMemoryFile.cpp", + "aten/src/TH/THStorageFunctions.cpp", + "aten/src/TH/THTensor.cpp", + "aten/src/TH/THTensorEvenMoreMath.cpp", + "aten/src/TH/THTensorFill.cpp", + "aten/src/TH/THTensorLapack.cpp", + "aten/src/TH/THTensorMath.cpp", + "aten/src/TH/THTensorMoreMath.cpp", + "aten/src/TH/THTensorRandom.cpp", + "aten/src/TH/THVector.cpp", + "aten/src/TH/vector/AVX.cpp", + ], +) + +filegroup( + name = "aten_cuda_srcs", + srcs = [ + "aten/src/ATen/cuda/CUDABlas.cpp", + "aten/src/ATen/cuda/CUDAContext.cpp", + "aten/src/ATen/cuda/CUDAGenerator.cpp", + "aten/src/ATen/cuda/CuSparseHandlePool.cpp", + "aten/src/ATen/cuda/CublasHandlePool.cpp", + "aten/src/ATen/cuda/PinnedMemoryAllocator.cpp", + "aten/src/ATen/cuda/detail/CUDAHooks.cpp", + "aten/src/ATen/cudnn/Descriptors.cpp", + "aten/src/ATen/cudnn/Handle.cpp", + "aten/src/ATen/cudnn/Types.cpp", + "aten/src/ATen/native/cuda/CUDAUnaryOps.cpp", + "aten/src/ATen/native/cuda/LegacyDefinitions.cpp", + "aten/src/ATen/native/cuda/TensorShapeCUDA.cpp", + "aten/src/ATen/native/cudnn/AffineGridGenerator.cpp", + "aten/src/ATen/native/cudnn/BatchNorm.cpp", + "aten/src/ATen/native/cudnn/Conv.cpp", + "aten/src/ATen/native/cudnn/GridSampler.cpp", + "aten/src/ATen/native/cudnn/LossCTC.cpp", + "aten/src/ATen/native/cudnn/RNN.cpp", + "aten/src/ATen/native/miopen/BatchNorm_miopen.cpp", + "aten/src/ATen/native/miopen/Conv_miopen.cpp", + "aten/src/ATen/native/miopen/RNN_miopen.cpp", + "aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp", + "aten/src/THC/THCCachingHostAllocator.cpp", + "aten/src/THC/THCGeneral.cpp", + "aten/src/THC/THCStorageCopy.cpp", + "aten/src/THC/THCTensor.cpp", + ], +) + +filegroup( + name = "thc_srcs_cu", + srcs = [ + "aten/src/THC/THCBlas.cu.cc", + "aten/src/THC/THCReduceApplyUtils.cu.cc", + "aten/src/THC/THCSleep.cu.cc", + "aten/src/THC/THCSortUtils.cu.cc", + "aten/src/THC/THCStorage.cu.cc", + "aten/src/THC/THCStorageCopy.cu.cc", + "aten/src/THC/THCTensor.cu.cc", + "aten/src/THC/THCTensorCopy.cu.cc", + "aten/src/THC/THCTensorIndex.cu.cc", + "aten/src/THC/THCTensorMath.cu.cc", + "aten/src/THC/THCTensorMathBlas.cu.cc", + "aten/src/THC/THCTensorMathMagma.cu.cc", + "aten/src/THC/THCTensorMathPairwise.cu.cc", + "aten/src/THC/THCTensorMathReduce.cu.cc", + "aten/src/THC/THCTensorMathScan.cu.cc", + "aten/src/THC/THCTensorMode.cu.cc", + "aten/src/THC/THCTensorRandom.cu.cc", + "aten/src/THC/THCTensorScatterGather.cu.cc", + "aten/src/THC/THCTensorSort.cu.cc", + "aten/src/THC/THCTensorTopK.cu.cc", + "aten/src/THC/generated/THCTensorMaskedBFloat16.cu.cc", + "aten/src/THC/generated/THCTensorMaskedBool.cu.cc", + "aten/src/THC/generated/THCTensorMaskedByte.cu.cc", + "aten/src/THC/generated/THCTensorMaskedChar.cu.cc", + "aten/src/THC/generated/THCTensorMaskedDouble.cu.cc", + "aten/src/THC/generated/THCTensorMaskedFloat.cu.cc", + "aten/src/THC/generated/THCTensorMaskedHalf.cu.cc", + "aten/src/THC/generated/THCTensorMaskedInt.cu.cc", + "aten/src/THC/generated/THCTensorMaskedLong.cu.cc", + "aten/src/THC/generated/THCTensorMaskedShort.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseBool.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseByte.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseChar.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseDouble.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseFloat.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseHalf.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseInt.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseLong.cu.cc", + "aten/src/THC/generated/THCTensorMathPointwiseShort.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceBFloat16.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceBool.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceByte.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceChar.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceDouble.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceFloat.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceHalf.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceInt.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceLong.cu.cc", + "aten/src/THC/generated/THCTensorMathReduceShort.cu.cc", + "aten/src/THC/generated/THCTensorSortByte.cu.cc", + "aten/src/THC/generated/THCTensorSortChar.cu.cc", + "aten/src/THC/generated/THCTensorSortDouble.cu.cc", + "aten/src/THC/generated/THCTensorSortFloat.cu.cc", + "aten/src/THC/generated/THCTensorSortHalf.cu.cc", + "aten/src/THC/generated/THCTensorSortInt.cu.cc", + "aten/src/THC/generated/THCTensorSortLong.cu.cc", + "aten/src/THC/generated/THCTensorSortShort.cu.cc", + ], +) + +filegroup( + name = "thcunn_srcs_cu", + srcs = [ + "aten/src/THCUNN/BCECriterion.cu.cc", + "aten/src/THCUNN/ClassNLLCriterion.cu.cc", + "aten/src/THCUNN/ELU.cu.cc", + "aten/src/THCUNN/GatedLinearUnit.cu.cc", + "aten/src/THCUNN/HardTanh.cu.cc", + "aten/src/THCUNN/LeakyReLU.cu.cc", + "aten/src/THCUNN/LogSigmoid.cu.cc", + "aten/src/THCUNN/MultiLabelMarginCriterion.cu.cc", + "aten/src/THCUNN/MultiMarginCriterion.cu.cc", + "aten/src/THCUNN/RReLU.cu.cc", + "aten/src/THCUNN/SoftMarginCriterion.cu.cc", + "aten/src/THCUNN/SoftPlus.cu.cc", + "aten/src/THCUNN/SoftShrink.cu.cc", + "aten/src/THCUNN/SpatialClassNLLCriterion.cu.cc", + "aten/src/THCUNN/SpatialConvolutionMM.cu.cc", + "aten/src/THCUNN/SpatialDepthwiseConvolution.cu.cc", + "aten/src/THCUNN/Tanh.cu.cc", + ], +) + +filegroup( + name = "aten_srcs_cu", + srcs = [ + "aten/src/ATen/cuda/detail/IndexUtils.cu.cc", + "aten/src/ATen/native/cuda/Activation.cu.cc", + "aten/src/ATen/native/cuda/AdaptiveAveragePooling.cu.cc", + "aten/src/ATen/native/cuda/AdaptiveAveragePooling3d.cu.cc", + "aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu.cc", + "aten/src/ATen/native/cuda/AdaptiveMaxPooling3d.cu.cc", + "aten/src/ATen/native/cuda/AveragePool2d.cu.cc", + "aten/src/ATen/native/cuda/AveragePool3d.cu.cc", + "aten/src/ATen/native/cuda/BatchLinearAlgebra.cu.cc", + "aten/src/ATen/native/cuda/BinaryArithmeticKernel.cu.cc", + "aten/src/ATen/native/cuda/BinaryCompareKernel.cu.cc", + "aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu.cc", + "aten/src/ATen/native/cuda/CUDAScalar.cu.cc", + "aten/src/ATen/native/cuda/Col2Im.cu.cc", + "aten/src/ATen/native/cuda/Copy.cu.cc", + "aten/src/ATen/native/cuda/CrossKernel.cu.cc", + "aten/src/ATen/native/cuda/DilatedMaxPool2d.cu.cc", + "aten/src/ATen/native/cuda/DilatedMaxPool3d.cu.cc", + "aten/src/ATen/native/cuda/DistanceKernel.cu.cc", + "aten/src/ATen/native/cuda/Distributions.cu.cc", + "aten/src/ATen/native/cuda/Dropout.cu.cc", + "aten/src/ATen/native/cuda/Embedding.cu.cc", + "aten/src/ATen/native/cuda/EmbeddingBackwardKernel.cu.cc", + "aten/src/ATen/native/cuda/EmbeddingBag.cu.cc", + "aten/src/ATen/native/cuda/FillKernel.cu.cc", + "aten/src/ATen/native/cuda/FractionalMaxPool2d.cu.cc", + "aten/src/ATen/native/cuda/FractionalMaxPool3d.cu.cc", + "aten/src/ATen/native/cuda/GridSampler.cu.cc", + "aten/src/ATen/native/cuda/Im2Col.cu.cc", + "aten/src/ATen/native/cuda/IndexKernel.cu.cc", + "aten/src/ATen/native/cuda/Indexing.cu.cc", + "aten/src/ATen/native/cuda/Lerp.cu.cc", + "aten/src/ATen/native/cuda/LinearAlgebra.cu.cc", + "aten/src/ATen/native/cuda/Loss.cu.cc", + "aten/src/ATen/native/cuda/LossCTC.cu.cc", + "aten/src/ATen/native/cuda/MaxUnpooling.cu.cc", + "aten/src/ATen/native/cuda/MultinomialKernel.cu.cc", + "aten/src/ATen/native/cuda/NaiveConvolutionTranspose2d.cu.cc", + "aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu.cc", + "aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu.cc", + "aten/src/ATen/native/cuda/Normalization.cu.cc", + "aten/src/ATen/native/cuda/PointwiseOpsKernel.cu.cc", + "aten/src/ATen/native/cuda/PowKernel.cu.cc", + "aten/src/ATen/native/cuda/RNN.cu.cc", + "aten/src/ATen/native/cuda/RangeFactories.cu.cc", + "aten/src/ATen/native/cuda/Reduce.cu.cc", + "aten/src/ATen/native/cuda/ReduceOpsKernel.cu.cc", + "aten/src/ATen/native/cuda/ReflectionPad.cu.cc", + "aten/src/ATen/native/cuda/Repeat.cu.cc", + "aten/src/ATen/native/cuda/ReplicationPadding.cu.cc", + "aten/src/ATen/native/cuda/Resize.cu.cc", + "aten/src/ATen/native/cuda/SoftMax.cu.cc", + "aten/src/ATen/native/cuda/SortingKthValue.cu.cc", + "aten/src/ATen/native/cuda/SparseMM.cu.cc", + "aten/src/ATen/native/cuda/SpectralOps.cu.cc", + "aten/src/ATen/native/cuda/SummaryOps.cu.cc", + "aten/src/ATen/native/cuda/TensorCompare.cu.cc", + "aten/src/ATen/native/cuda/TensorFactories.cu.cc", + "aten/src/ATen/native/cuda/TensorTransformations.cu.cc", + "aten/src/ATen/native/cuda/TriangularOps.cu.cc", + "aten/src/ATen/native/cuda/UnaryOpsKernel.cu.cc", + "aten/src/ATen/native/cuda/Unique.cu.cc", + "aten/src/ATen/native/cuda/UpSampleBicubic2d.cu.cc", + "aten/src/ATen/native/cuda/UpSampleBilinear2d.cu.cc", + "aten/src/ATen/native/cuda/UpSampleLinear1d.cu.cc", + "aten/src/ATen/native/cuda/UpSampleNearest1d.cu.cc", + "aten/src/ATen/native/cuda/UpSampleNearest2d.cu.cc", + "aten/src/ATen/native/cuda/UpSampleNearest3d.cu.cc", + "aten/src/ATen/native/cuda/UpSampleTrilinear3d.cu.cc", + "aten/src/ATen/native/cuda/WeightNorm.cu.cc", + "aten/src/ATen/native/cuda/layer_norm_kernel.cu.cc", + "aten/src/ATen/native/quantized/cuda/fake_quantize_core.cu.cc", + "aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu.cc", + "aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu.cc", + "aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu.cc", + ], +) + +template_rule( + name = "aten_src_ATen_config", + src = "aten/src/ATen/Config.h.in", + out = "aten/src/ATen/Config.h", + substitutions = { + "@AT_MKLDNN_ENABLED@": "1", + "@AT_MKL_ENABLED@": "0", + "@AT_NNPACK_ENABLED@": "0", + "@CAFFE2_STATIC_LINK_CUDA_INT@": "0", + }, +) + +template_rule( + name = "aten_src_ATen_cuda_config", + src = "aten/src/ATen/cuda/CUDAConfig.h.in", + out = "aten/src/ATen/cuda/CUDAConfig.h", + substitutions = { + "@AT_CUDNN_ENABLED@": "1", + "@AT_ROCM_ENABLED@": "0", + "@NVCC_FLAGS_EXTRA@": "", + }, +) + +template_rule( + name = "aten_src_TH_THGeneral", + src = "aten/src/TH/THGeneral.h.in", + out = "aten/src/TH/THGeneral.h", + substitutions = { + "cmakedefine": "define", + }, +) + +template_rule( + name = "aten_src_THC_THCGeneral", + src = "aten/src/THC/THCGeneral.h.in", + out = "aten/src/THC/THCGeneral.h", + substitutions = { + "#cmakedefine USE_MAGMA": "", + }, +) + +cc_library( + name = "aten_headers", + hdrs = [ + "aten/src/TH/THGeneral.h", + "aten/src/THC/THCGeneral.h", + "torch/csrc/WindowsTorchApiMacro.h", + "torch/csrc/jit/frontend/function_schema_parser.h", + "aten/src/ATen/templates/TensorBody.h", + "aten/src/ATen/templates/TensorMethods.h", + ] + glob([ + "aten/src/ATen/*.h", + "aten/src/ATen/core/*.h", + "aten/src/ATen/core/boxing/*.h", + "aten/src/ATen/core/boxing/impl/*.h", + "aten/src/ATen/core/dispatch/*.h", + "aten/src/ATen/core/op_registration/*.h", + "aten/src/ATen/cpu/*.h", + "aten/src/ATen/cpu/vec256/*.h", + "aten/src/ATen/cuda/*.cuh", + "aten/src/ATen/cuda/*.h", + "aten/src/ATen/cuda/detail/*.cuh", + "aten/src/ATen/cuda/detail/*.h", + "aten/src/ATen/cuda/nvrtc_stub/*.h", + "aten/src/ATen/cudnn/*.h", + "aten/src/ATen/detail/*.h", + "aten/src/ATen/mkl/*.h", + "aten/src/ATen/mkldnn/*.h", + "aten/src/ATen/native/*.h", + "aten/src/ATen/native/cpu/*.h", + "aten/src/ATen/native/cuda/*.cuh", + "aten/src/ATen/native/cuda/*.h", + "aten/src/ATen/native/mkldnn/*.h", + "aten/src/ATen/native/quantized/*.h", + "aten/src/ATen/native/quantized/cuda/*.h", + "aten/src/ATen/native/quantized/cpu/*.h", + "aten/src/ATen/native/sparse/*.h", + "aten/src/ATen/native/sparse/cuda/*.cuh", + "aten/src/ATen/native/utils/*.h", + "aten/src/ATen/native/xnnpack/*.h", + "aten/src/ATen/quantized/*.h", + "aten/src/TH/*.hpp", + "aten/src/TH/*.h", + "aten/src/TH/vector/*.h", + "aten/src/TH/generic/*.hpp", + "aten/src/TH/generic/*.h", + "aten/src/TH/generic/*.cpp", + "aten/src/TH/generic/*.c", + "aten/src/THC/*.cpp", + "aten/src/THC/*.cuh", + "aten/src/THC/*.h", + "aten/src/THC/*.hpp", + "aten/src/THC/generic/*.cpp", + "aten/src/THC/generic/*.cu.cc", + "aten/src/THC/generic/*.h", + "aten/src/THC/generic/*.hpp", + "aten/src/THCUNN/*.h", + "aten/src/THCUNN/*.cuh", + "aten/src/THCUNN/generic/*.h", + "aten/src/THCUNN/generic/*.cu.cc", + ]) + [ + ":generated_cpp", + ":aten_src_ATen_config", + ], + includes = [ + "aten/src", + "aten/src/TH", + ], + deps = [ + ":c10_headers", + ], +) + +ATEN_COPTS = COMMON_COPTS + [ + "-DUSE_AVX", + "-DUSE_AVX2", + "-DCAFFE2_BUILD_MAIN_LIBS", + "-DHAVE_AVX_CPU_DEFINITION", + "-DHAVE_AVX2_CPU_DEFINITION", + "-fvisibility-inlines-hidden", + "-fno-math-errno", + "-fno-trapping-math", +] + +intern_build_aten_ops( + copts = ATEN_COPTS, + deps = [ + ":aten_headers", + "@fbgemm", + ], +) + +cc_library( + name = "th", + srcs = [ + ":th_srcs", + ], + copts = ATEN_COPTS + [ + "-mavx", + ], + deps = [ + ":aten_headers", + "@fbgemm", + ], +) + +cc_library( + name = "aten", + srcs = [ + ":ATen_CORE_SRCS", + ":ATen_QUANTIZED_SRCS", + ":aten_base_cpp", + ":aten_native_cpp", + ":aten_native_mkl_cpp", + ":aten_native_mkldnn_cpp", + ":aten_native_quantized_cpp", + ":aten_native_sparse_cpp", + ":aten_native_xnnpack", + ":aten_src_ATen_config", + ":generated_cpp", + ], + copts = ATEN_COPTS, + data = if_cuda( + [":libcaffe2_nvrtc.so"], + [], + ), + visibility = ["//visibility:public"], + deps = [ + ":ATen_CPU", + ":aten_headers", + ":caffe2_for_aten_headers", + ":th", + ":torch_headers", + "@fbgemm", + "@ideep", + ], + alwayslink = True, +) + +cc_library( + name = "aten_nvrtc", + srcs = glob([ + "aten/src/ATen/cuda/nvrtc_stub/*.cpp", + ]), + copts = ATEN_COPTS, + linkstatic = True, + visibility = ["//visibility:public"], + deps = [ + ":aten_headers", + ":c10_headers", + "@cuda", + "@cuda//:cuda_driver", + "@cuda//:nvrtc", + ], + alwayslink = True, +) + +cc_binary( + name = "libcaffe2_nvrtc.so", + linkshared = True, + visibility = ["//visibility:public"], + deps = [ + ":aten_nvrtc", + ], +) + +cc_library( + name = "aten_cuda_cpp", + srcs = [":aten_cuda_srcs"], + copts = ATEN_COPTS, + visibility = ["//visibility:public"], + deps = [ + ":aten", + "@cuda", + "@cuda//:nvrtc", + "@cudnn", + ], + alwayslink = True, +) + +torch_cuda_half_options = [ + "-DCUDA_HAS_FP16=1", + "-D__CUDA_NO_HALF_OPERATORS__", + "-D__CUDA_NO_HALF_CONVERSIONS__", + "-D__CUDA_NO_HALF2_OPERATORS__", +] + +cu_library( + name = "aten_cuda", + srcs = [ + ":aten_srcs_cu", + ":thc_srcs_cu", + ":thcunn_srcs_cu", + ], + copts = ATEN_COPTS + torch_cuda_half_options, + visibility = ["//visibility:public"], + deps = [ + ":aten_cuda_cpp", + "@cuda//:cublas", + "@cuda//:cufft", + "@cuda//:cusparse", + ], + alwayslink = True, +) + +# caffe2 +CAFFE2_COPTS = COMMON_COPTS + [ + "-Dcaffe2_EXPORTS", + "-DCAFFE2_USE_GLOO", + "-DCAFFE2_USE_CUDNN", + "-DCAFFE2_BUILD_MAIN_LIB", + "-fvisibility-inlines-hidden", + "-fno-math-errno", + "-fno-trapping-math", +] + +proto_library( + name = "caffe2_proto_source", + srcs = glob([ + "caffe2/proto/*.proto", + ]), + visibility = ["//visibility:public"], +) + +cc_proto_library( + name = "caffe2_protos", + deps = [":caffe2_proto_source"], +) + +template_rule( + name = "caffe2_core_macros_h", + src = "caffe2/core/macros.h.in", + out = "caffe2/core/macros.h", + substitutions = { + "@CAFFE2_VERSION_MAJOR@": "1", + "@CAFFE2_VERSION_MINOR@": "3", + "@CAFFE2_VERSION_PATCH@": "0", + "cmakedefine": "define", + "#define CAFFE2_FORCE_FALLBACK_CUDA_MPI": "/* #undef CAFFE2_FORCE_FALLBACK_CUDA_MPI */", + "#define CAFFE2_HAS_MKL_DNN": "/* #undef CAFFE2_HAS_MKL_DNN */", + "#define CAFFE2_HAS_MKL_SGEMM_PACK": "/* #undef CAFFE2_HAS_MKL_SGEMM_PACK */", + "#define CAFFE2_THREADPOOL_MAIN_IMBALANCE": "/* #undef CAFFE2_THREADPOOL_MAIN_IMBALANCE */", + "#define CAFFE2_THREADPOOL_STATS": "/* #undef CAFFE2_THREADPOOL_STATS */", + "#define CAFFE2_USE_ACCELERATE": "/* #undef CAFFE2_USE_ACCELERATE */", + "#define CAFFE2_USE_EIGEN_FOR_BLAS": "/* #undef CAFFE2_USE_EIGEN_FOR_BLAS */", + "#define CAFFE2_USE_FBCODE": "/* #undef CAFFE2_USE_FBCODE */", + "#define CAFFE2_USE_GOOGLE_GLOG": "/* #undef CAFFE2_USE_GOOGLE_GLOG */", + "#define CAFFE2_USE_LITE_PROTO": "/* #undef CAFFE2_USE_LITE_PROTO */", + "#define CAFFE2_USE_MKL\n": "/* #undef CAFFE2_USE_MKL */\n", + "#define CAFFE2_USE_NVTX": "/* #undef CAFFE2_USE_NVTX */", + "#define CAFFE2_USE_TRT": "/* #undef CAFFE2_USE_TRT */", + }, +) + +filegroup( + name = "caffe2_contrib_srcs", + srcs = [ + "caffe2/contrib/gloo/allgather_ops.cc", + "caffe2/contrib/gloo/allreduce_ops.cc", + "caffe2/contrib/gloo/barrier_ops.cc", + "caffe2/contrib/gloo/broadcast_ops.cc", + "caffe2/contrib/gloo/common.cc", + "caffe2/contrib/gloo/common_world_ops.cc", + "caffe2/contrib/gloo/context.cc", + "caffe2/contrib/gloo/reduce_scatter_ops.cc", + "caffe2/contrib/gloo/store_handler.cc", + ], +) + +filegroup( + name = "caffe2_core_srcs", + srcs = [ + "caffe2/core/allocator.cc", + "caffe2/core/blob_serialization.cc", + "caffe2/core/blob_stats.cc", + "caffe2/core/common.cc", + "caffe2/core/context.cc", + "caffe2/core/context_base.cc", + "caffe2/core/db.cc", + "caffe2/core/event.cc", + "caffe2/core/export_c10_op_to_caffe2.cc", + "caffe2/core/graph.cc", + "caffe2/core/init.cc", + "caffe2/core/init_denormals.cc", + "caffe2/core/init_intrinsics_check.cc", + "caffe2/core/init_omp.cc", + "caffe2/core/int8_serialization.cc", + "caffe2/core/memonger.cc", + "caffe2/core/module.cc", + "caffe2/core/net.cc", + "caffe2/core/net_async_base.cc", + "caffe2/core/net_async_scheduling.cc", + "caffe2/core/net_async_task.cc", + "caffe2/core/net_async_task_future.cc", + "caffe2/core/net_async_task_graph.cc", + "caffe2/core/net_async_tracing.cc", + "caffe2/core/net_dag_utils.cc", + "caffe2/core/net_parallel.cc", + "caffe2/core/net_simple.cc", + "caffe2/core/net_simple_refcount.cc", + "caffe2/core/nomnigraph/Representations/NeuralNet.cc", + "caffe2/core/nomnigraph/tests/test_util.cc", + "caffe2/core/numa.cc", + "caffe2/core/operator.cc", + "caffe2/core/operator_schema.cc", + "caffe2/core/plan_executor.cc", + "caffe2/core/prof_dag_counters.cc", + "caffe2/core/qtensor.cc", + "caffe2/core/qtensor_serialization.cc", + "caffe2/core/stats.cc", + "caffe2/core/tensor.cc", + "caffe2/core/tensor_int8.cc", + "caffe2/core/test_utils.cc", + "caffe2/core/transform.cc", + "caffe2/core/types.cc", + "caffe2/core/workspace.cc", + ], +) + +filegroup( + name = "caffe2_distributed_srcs", + srcs = [ + "caffe2/distributed/file_store_handler.cc", + "caffe2/distributed/file_store_handler_op.cc", + "caffe2/distributed/store_handler.cc", + "caffe2/distributed/store_ops.cc", + ], +) + +filegroup( + name = "caffe2_ideep_srcs", + srcs = [ + "caffe2/ideep/operators/adam_op.cc", + "caffe2/ideep/operators/channel_shuffle_op.cc", + "caffe2/ideep/operators/concat_split_op.cc", + "caffe2/ideep/operators/conv_op.cc", + "caffe2/ideep/operators/conv_transpose_op.cc", + "caffe2/ideep/operators/dropout_op.cc", + "caffe2/ideep/operators/elementwise_sum_op.cc", + "caffe2/ideep/operators/expand_squeeze_dims_op.cc", + "caffe2/ideep/operators/fully_connected_op.cc", + "caffe2/ideep/operators/local_response_normalization_op.cc", + "caffe2/ideep/operators/momentum_sgd_op.cc", + "caffe2/ideep/operators/operator_fallback_ideep.cc", + "caffe2/ideep/operators/order_switch_ops.cc", + "caffe2/ideep/operators/pool_op.cc", + "caffe2/ideep/operators/quantization/int8_add_op.cc", + "caffe2/ideep/operators/quantization/int8_conv_op.cc", + "caffe2/ideep/operators/quantization/int8_dequantize_op.cc", + "caffe2/ideep/operators/quantization/int8_fully_connected_op.cc", + "caffe2/ideep/operators/quantization/int8_given_tensor_fill_op.cc", + "caffe2/ideep/operators/quantization/int8_pool_op.cc", + "caffe2/ideep/operators/quantization/int8_quantize_op.cc", + "caffe2/ideep/operators/quantization/int8_relu_op.cc", + "caffe2/ideep/operators/queue_ops.cc", + "caffe2/ideep/operators/relu_op.cc", + "caffe2/ideep/operators/reshape_op.cc", + "caffe2/ideep/operators/shape_op.cc", + "caffe2/ideep/operators/sigmoid_op.cc", + "caffe2/ideep/operators/spatial_batch_norm_op.cc", + "caffe2/ideep/operators/transpose_op.cc", + "caffe2/ideep/operators/utility_ops.cc", + "caffe2/ideep/utils/ideep_register.cc", + ], +) + +filegroup( + name = "caffe2_onnx_srcs", + srcs = [ + "caffe2/onnx/backend.cc", + "caffe2/onnx/backend_rep.cc", + "caffe2/onnx/device.cc", + "caffe2/onnx/helper.cc", + "caffe2/onnx/offline_tensor.cc", + "caffe2/onnx/onnx_exporter.cc", + "caffe2/onnx/onnxifi_graph_info.cc", + "caffe2/onnx/onnxifi_init.cc", + ], +) + +filegroup( + name = "caffe2_operators_srcs", + srcs = [ + "caffe2/operators/abs_op.cc", + "caffe2/operators/accumulate_op.cc", + "caffe2/operators/accuracy_op.cc", + "caffe2/operators/acos_op.cc", + "caffe2/operators/affine_channel_op.cc", + "caffe2/operators/alias_with_name.cc", + "caffe2/operators/apmeter_op.cc", + "caffe2/operators/arg_ops.cc", + "caffe2/operators/asin_op.cc", + "caffe2/operators/assert_op.cc", + "caffe2/operators/atan_op.cc", + "caffe2/operators/atomic_ops.cc", + "caffe2/operators/batch_box_cox_op.cc", + "caffe2/operators/batch_bucketize_op.cc", + "caffe2/operators/batch_gather_ops.cc", + "caffe2/operators/batch_matmul_op.cc", + "caffe2/operators/batch_moments_op.cc", + "caffe2/operators/batch_permutation_op.cc", + "caffe2/operators/batch_sparse_to_dense_op.cc", + "caffe2/operators/bbox_transform_op.cc", + "caffe2/operators/bisect_percentile_op.cc", + "caffe2/operators/boolean_mask_ops.cc", + "caffe2/operators/boolean_unmask_ops.cc", + "caffe2/operators/box_with_nms_limit_op.cc", + "caffe2/operators/bucketize_op.cc", + "caffe2/operators/byte_weight_dequant_op.cc", + "caffe2/operators/cast_op.cc", + "caffe2/operators/cbrt_op.cc", + "caffe2/operators/cc_bmm_bg_op.cc", + "caffe2/operators/ceil_op.cc", + "caffe2/operators/channel_backprop_stats_op.cc", + "caffe2/operators/channel_shuffle_op.cc", + "caffe2/operators/channel_stats_op.cc", + "caffe2/operators/clip_op.cc", + "caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.cc", + "caffe2/operators/communicator_op.cc", + "caffe2/operators/concat_split_op.cc", + "caffe2/operators/conditional_op.cc", + "caffe2/operators/conv_gradient_op.cc", + "caffe2/operators/conv_op.cc", + "caffe2/operators/conv_op_eigen.cc", + "caffe2/operators/conv_op_shared.cc", + "caffe2/operators/conv_transpose_gradient_op.cc", + "caffe2/operators/conv_transpose_op_mobile.cc", + "caffe2/operators/copy_op.cc", + "caffe2/operators/copy_rows_to_tensor_op.cc", + "caffe2/operators/cos_op.cc", + "caffe2/operators/cosh_op.cc", + "caffe2/operators/cosine_embedding_criterion_op.cc", + "caffe2/operators/counter_ops.cc", + "caffe2/operators/crash_op.cc", + "caffe2/operators/create_scope_op.cc", + "caffe2/operators/crf_viterbi_op.cc", + "caffe2/operators/cross_entropy_op.cc", + "caffe2/operators/ctc_beam_search_decoder_op.cc", + "caffe2/operators/ctc_greedy_decoder_op.cc", + "caffe2/operators/cube_op.cc", + "caffe2/operators/data_couple.cc", + "caffe2/operators/dataset_ops.cc", + "caffe2/operators/deform_conv_gradient_op.cc", + "caffe2/operators/deform_conv_op.cc", + "caffe2/operators/dense_vector_to_id_list_op.cc", + "caffe2/operators/distance_op.cc", + "caffe2/operators/do_op.cc", + "caffe2/operators/dropout_op.cc", + "caffe2/operators/elementwise_add_gradient_op.cc", + "caffe2/operators/elementwise_add_op.cc", + "caffe2/operators/elementwise_div_gradient_op.cc", + "caffe2/operators/elementwise_div_op.cc", + "caffe2/operators/elementwise_linear_op.cc", + "caffe2/operators/elementwise_logical_ops.cc", + "caffe2/operators/elementwise_mul_gradient_op.cc", + "caffe2/operators/elementwise_mul_op.cc", + "caffe2/operators/elementwise_ops.cc", + "caffe2/operators/elementwise_ops_schema.cc", + "caffe2/operators/elementwise_ops_utils.cc", + "caffe2/operators/elementwise_sub_gradient_op.cc", + "caffe2/operators/elementwise_sub_op.cc", + "caffe2/operators/elementwise_sum_op.cc", + "caffe2/operators/elu_op.cc", + "caffe2/operators/enforce_finite_op.cc", + "caffe2/operators/ensure_clipped_op.cc", + "caffe2/operators/ensure_cpu_output_op.cc", + "caffe2/operators/erf_op.cc", + "caffe2/operators/exp_op.cc", + "caffe2/operators/expand_op.cc", + "caffe2/operators/expand_squeeze_dims_op.cc", + "caffe2/operators/fc_inference.cc", + "caffe2/operators/feature_maps_ops.cc", + "caffe2/operators/feed_blob_op.cc", + "caffe2/operators/filler_op.cc", + "caffe2/operators/find_duplicate_elements_op.cc", + "caffe2/operators/find_op.cc", + "caffe2/operators/flatten_op.cc", + "caffe2/operators/flexible_top_k.cc", + "caffe2/operators/floor_op.cc", + "caffe2/operators/free_op.cc", + "caffe2/operators/fully_connected_op.cc", + "caffe2/operators/fused_rowwise_8bit_conversion_ops.cc", + "caffe2/operators/fused_rowwise_random_quantization_ops.cc", + "caffe2/operators/gather_fused_8bit_rowwise_op.cc", + "caffe2/operators/gather_op.cc", + "caffe2/operators/gather_ranges_to_dense_op.cc", + "caffe2/operators/gelu_op.cc", + "caffe2/operators/generate_proposals_op.cc", + "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cc", + "caffe2/operators/given_tensor_fill_op.cc", + "caffe2/operators/glu_op.cc", + "caffe2/operators/group_norm_op.cc", + "caffe2/operators/gru_unit_op.cc", + "caffe2/operators/h_softmax_op.cc", + "caffe2/operators/half_float_ops.cc", + "caffe2/operators/hard_sigmoid_op.cc", + "caffe2/operators/heatmap_max_keypoint_op.cc", + "caffe2/operators/if_op.cc", + "caffe2/operators/im2col_op.cc", + "caffe2/operators/index_hash_ops.cc", + "caffe2/operators/index_ops.cc", + "caffe2/operators/inference_lstm_op.cc", + "caffe2/operators/instance_norm_gradient_op.cc", + "caffe2/operators/instance_norm_op.cc", + "caffe2/operators/integral_image_op.cc", + "caffe2/operators/is_empty_op.cc", + "caffe2/operators/jsd_op.cc", + "caffe2/operators/key_split_ops.cc", + "caffe2/operators/last_n_window_collector.cc", + "caffe2/operators/layer_norm_op.cc", + "caffe2/operators/leaky_relu_op.cc", + "caffe2/operators/length_split_op.cc", + "caffe2/operators/lengths_pad_op.cc", + "caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.cc", + "caffe2/operators/lengths_reducer_ops.cc", + "caffe2/operators/lengths_reducer_rowwise_8bit_ops.cc", + "caffe2/operators/lengths_tile_op.cc", + "caffe2/operators/lengths_top_k_op.cc", + "caffe2/operators/listwise_l2r_op.cc", + "caffe2/operators/load_save_op.cc", + "caffe2/operators/load_save_op_util.cc", + "caffe2/operators/local_response_normalization_op.cc", + "caffe2/operators/locally_connected_op.cc", + "caffe2/operators/locally_connected_op_util.cc", + "caffe2/operators/log_op.cc", + "caffe2/operators/logit_op.cc", + "caffe2/operators/loss_op.cc", + "caffe2/operators/lp_pool_op.cc", + "caffe2/operators/lpnorm_op.cc", + "caffe2/operators/lstm_unit_op.cc", + "caffe2/operators/map_ops.cc", + "caffe2/operators/margin_ranking_criterion_op.cc", + "caffe2/operators/matmul_op.cc", + "caffe2/operators/mean_op.cc", + "caffe2/operators/merge_id_lists_op.cc", + "caffe2/operators/minmax_gradient_ops.cc", + "caffe2/operators/minmax_ops.cc", + "caffe2/operators/mod_op.cc", + "caffe2/operators/moments_op.cc", + "caffe2/operators/multi_class_accuracy_op.cc", + "caffe2/operators/negate_gradient_op.cc", + "caffe2/operators/negative_op.cc", + "caffe2/operators/ngram_ops.cc", + "caffe2/operators/norm_planar_yuv_op.cc", + "caffe2/operators/normalize_l1_op.cc", + "caffe2/operators/normalize_op.cc", + "caffe2/operators/numpy_tile_op.cc", + "caffe2/operators/one_hot_ops.cc", + "caffe2/operators/onnx_while_op.cc", + "caffe2/operators/order_switch_ops.cc", + "caffe2/operators/pack_rnn_sequence_op.cc", + "caffe2/operators/pack_segments.cc", + "caffe2/operators/pad_op.cc", + "caffe2/operators/partition_ops.cc", + "caffe2/operators/percentile_op.cc", + "caffe2/operators/perplexity_op.cc", + "caffe2/operators/piecewise_linear_transform_op.cc", + "caffe2/operators/pool_gradient_op.cc", + "caffe2/operators/pool_op.cc", + "caffe2/operators/pool_op_util.cc", + "caffe2/operators/pow_op.cc", + "caffe2/operators/prelu_op.cc", + "caffe2/operators/prepend_dim_op.cc", + "caffe2/operators/quant_decode_op.cc", + "caffe2/operators/rank_loss_op.cc", + "caffe2/operators/reciprocal_gradient_op.cc", + "caffe2/operators/reciprocal_op.cc", + "caffe2/operators/reduce_front_back_max_ops.cc", + "caffe2/operators/reduce_front_back_mean_ops.cc", + "caffe2/operators/reduce_front_back_sum_ops.cc", + "caffe2/operators/reduce_ops.cc", + "caffe2/operators/reduction_ops.cc", + "caffe2/operators/relu_n_op.cc", + "caffe2/operators/relu_op.cc", + "caffe2/operators/remove_data_blocks_op.cc", + "caffe2/operators/replace_nan_op.cc", + "caffe2/operators/reservoir_sampling.cc", + "caffe2/operators/reshape_op.cc", + "caffe2/operators/resize_3d_op.cc", + "caffe2/operators/resize_op.cc", + "caffe2/operators/reverse_packed_segs_op.cc", + "caffe2/operators/rmac_regions_op.cc", + "caffe2/operators/rnn/recurrent_network_blob_fetcher_op.cc", + "caffe2/operators/rnn/recurrent_network_executor.cc", + "caffe2/operators/rnn/recurrent_network_op.cc", + "caffe2/operators/roi_align_gradient_op.cc", + "caffe2/operators/roi_align_op.cc", + "caffe2/operators/roi_align_rotated_gradient_op.cc", + "caffe2/operators/roi_align_rotated_op.cc", + "caffe2/operators/roi_pool_op.cc", + "caffe2/operators/rowmul_op.cc", + "caffe2/operators/rsqrt_op.cc", + "caffe2/operators/scale_blobs_op.cc", + "caffe2/operators/scale_op.cc", + "caffe2/operators/segment_reduction_op.cc", + "caffe2/operators/selu_op.cc", + "caffe2/operators/sequence_ops.cc", + "caffe2/operators/shape_op.cc", + "caffe2/operators/sigmoid_gradient_op.cc", + "caffe2/operators/sigmoid_op.cc", + "caffe2/operators/sin_op.cc", + "caffe2/operators/sinh_op.cc", + "caffe2/operators/sinusoid_position_encoding_op.cc", + "caffe2/operators/slice_op.cc", + "caffe2/operators/softmax_op.cc", + "caffe2/operators/softmax_utils.cc", + "caffe2/operators/softmax_with_loss_op.cc", + "caffe2/operators/softplus_op.cc", + "caffe2/operators/softsign_op.cc", + "caffe2/operators/space_batch_op.cc", + "caffe2/operators/sparse_dropout_with_replacement_op.cc", + "caffe2/operators/sparse_normalize_op.cc", + "caffe2/operators/sparse_to_dense_mask_op.cc", + "caffe2/operators/sparse_to_dense_op.cc", + "caffe2/operators/spatial_batch_norm_gradient_op.cc", + "caffe2/operators/spatial_batch_norm_op.cc", + "caffe2/operators/spatial_softmax_with_loss_op.cc", + "caffe2/operators/sqr_op.cc", + "caffe2/operators/sqrt_op.cc", + "caffe2/operators/square_root_divide_op.cc", + "caffe2/operators/stats_ops.cc", + "caffe2/operators/stats_put_ops.cc", + "caffe2/operators/stop_gradient.cc", + "caffe2/operators/string_ops.cc", + "caffe2/operators/stump_func_op.cc", + "caffe2/operators/stylizer_ops.cc", + "caffe2/operators/summarize_op.cc", + "caffe2/operators/swish_op.cc", + "caffe2/operators/tan_op.cc", + "caffe2/operators/tanh_gradient_op.cc", + "caffe2/operators/tanh_op.cc", + "caffe2/operators/tensor_protos_db_input.cc", + "caffe2/operators/text_file_reader.cc", + "caffe2/operators/text_file_reader_utils.cc", + "caffe2/operators/thresholded_relu_op.cc", + "caffe2/operators/tile_op.cc", + "caffe2/operators/top_k.cc", + "caffe2/operators/transpose_op.cc", + "caffe2/operators/tt_linear_op.cc", + "caffe2/operators/unique_ops.cc", + "caffe2/operators/upsample_op.cc", + "caffe2/operators/utility_ops.cc", + "caffe2/operators/variable_length_sequence_padding.cc", + "caffe2/operators/weighted_multi_sampling_op.cc", + "caffe2/operators/weighted_sample_op.cc", + "caffe2/operators/while_op.cc", + "caffe2/operators/workspace_ops.cc", + "caffe2/operators/zero_gradient_op.cc", + ], +) + +filegroup( + name = "caffe2_opt_srcs", + srcs = [ + "caffe2/opt/annotations.cc", + "caffe2/opt/backend_cutting.cc", + "caffe2/opt/backend_transformer_base.cc", + "caffe2/opt/bound_shape_inferencer.cc", + "caffe2/opt/converter.cc", + "caffe2/opt/dead_code_elim.cc", + "caffe2/opt/device.cc", + "caffe2/opt/distributed.cc", + "caffe2/opt/distributed_converter.cc", + "caffe2/opt/fusion.cc", + "caffe2/opt/mobile.cc", + "caffe2/opt/onnxifi_op.cc", + "caffe2/opt/onnxifi_transformer.cc", + "caffe2/opt/optimize_ideep.cc", + "caffe2/opt/optimizer.cc", + "caffe2/opt/passes.cc", + "caffe2/opt/shape_info.cc", + "caffe2/opt/tvm_transformer.cc", + ], +) + +filegroup( + name = "caffe2_perfkernels_srcs", + srcs = [ + "caffe2/perfkernels/adagrad.cc", + "caffe2/perfkernels/embedding_lookup.cc", + "caffe2/perfkernels/embedding_lookup_idx.cc", + "caffe2/perfkernels/fused_8bit_rowwise_conversion.cc", + "caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc", + "caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.cc", + "caffe2/perfkernels/lstm_unit_cpu_common.cc", + "caffe2/perfkernels/math_cpu_base.cc", + "caffe2/perfkernels/typed_axpy.cc", + ], +) + +filegroup( + name = "caffe2_predictor_srcs", + srcs = [ + "caffe2/predictor/emulator/data_filler.cc", + "caffe2/predictor/emulator/data_filler.h", + "caffe2/predictor/predictor.cc", + "caffe2/predictor/predictor_config.cc", + "caffe2/predictor/predictor_utils.cc", + ], +) + +filegroup( + name = "caffe2_quantization_srcs", + srcs = [ + "caffe2/quantization/server/activation_distribution_observer.cc", + "caffe2/quantization/server/batch_matmul_dnnlowp_op.cc", + "caffe2/quantization/server/caffe2_dnnlowp_utils.cc", + "caffe2/quantization/server/channel_shuffle_dnnlowp_op.cc", + "caffe2/quantization/server/concat_dnnlowp_op.cc", + "caffe2/quantization/server/conv_dnnlowp_acc16_op.cc", + "caffe2/quantization/server/conv_dnnlowp_op.cc", + "caffe2/quantization/server/conv_relu_op.cc", + "caffe2/quantization/server/dequantize_dnnlowp_op.cc", + "caffe2/quantization/server/dnnlowp.cc", + "caffe2/quantization/server/dnnlowp_partition.cc", + "caffe2/quantization/server/dynamic_histogram.cc", + "caffe2/quantization/server/elementwise_add_dnnlowp_op.cc", + "caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc", + "caffe2/quantization/server/elementwise_mul_dnnlowp_op.cc", + "caffe2/quantization/server/elementwise_sum_dnnlowp_op.cc", + "caffe2/quantization/server/elementwise_sum_relu_op.cc", + "caffe2/quantization/server/fbgemm_pack_matrix_cache.cc", + "caffe2/quantization/server/fbgemm_pack_op.cc", + "caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc", + "caffe2/quantization/server/fully_connected_dnnlowp_op.cc", + "caffe2/quantization/server/fully_connected_fake_lowp_op.cc", + "caffe2/quantization/server/group_norm_dnnlowp_op.cc", + "caffe2/quantization/server/kl_minimization.cc", + "caffe2/quantization/server/lstm_unit_dnnlowp_op.cc", + "caffe2/quantization/server/norm_minimization.cc", + "caffe2/quantization/server/p99.cc", + "caffe2/quantization/server/pool_dnnlowp_op.cc", + "caffe2/quantization/server/quantize_dnnlowp_op.cc", + "caffe2/quantization/server/relu_dnnlowp_op.cc", + "caffe2/quantization/server/sigmoid.cc", + "caffe2/quantization/server/sigmoid_dnnlowp_op.cc", + "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc", + "caffe2/quantization/server/tanh.cc", + "caffe2/quantization/server/tanh_dnnlowp_op.cc", + "caffe2/quantization/server/utility_dnnlowp_ops.cc", + ], +) + +filegroup( + name = "caffe2_queue_srcs", + srcs = [ + "caffe2/queue/blobs_queue.cc", + "caffe2/queue/blobs_queue_db.cc", + "caffe2/queue/queue_ops.cc", + "caffe2/queue/rebatching_queue.cc", + "caffe2/queue/rebatching_queue_ops.cc", + ], +) + +filegroup( + name = "caffe2_serialize_srcs", + srcs = [ + "caffe2/serialize/file_adapter.cc", + "caffe2/serialize/inline_container.cc", + "caffe2/serialize/istream_adapter.cc", + "caffe2/serialize/read_adapter_interface.cc", + ], +) + +filegroup( + name = "caffe2_sgd_srcs", + srcs = [ + "caffe2/sgd/adadelta_op.cc", + "caffe2/sgd/adagrad_op.cc", + "caffe2/sgd/adam_op.cc", + "caffe2/sgd/clip_tensor_op.cc", + "caffe2/sgd/ftrl_op.cc", + "caffe2/sgd/gftrl_op.cc", + "caffe2/sgd/iter_op.cc", + "caffe2/sgd/lars_op.cc", + "caffe2/sgd/learning_rate_adaption_op.cc", + "caffe2/sgd/learning_rate_op.cc", + "caffe2/sgd/momentum_sgd_op.cc", + "caffe2/sgd/rmsprop_op.cc", + "caffe2/sgd/wngrad_op.cc", + "caffe2/sgd/yellowfin_op.cc", + ], +) + +filegroup( + name = "caffe2_transforms_srcs", + srcs = [ + "caffe2/transforms/common_subexpression_elimination.cc", + "caffe2/transforms/conv_to_nnpack_transform.cc", + "caffe2/transforms/pattern_net_transform.cc", + "caffe2/transforms/single_op_transform.cc", + ], +) + +filegroup( + name = "caffe2_utils_srcs", + srcs = [ + "caffe2/utils/bench_utils.cc", + "caffe2/utils/cpuid.cc", + "caffe2/utils/math/broadcast.cc", + "caffe2/utils/math/elementwise.cc", + "caffe2/utils/math/reduce.cc", + "caffe2/utils/math/transpose.cc", + "caffe2/utils/math/utils.cc", + "caffe2/utils/math_cpu.cc", + "caffe2/utils/murmur_hash3.cc", + "caffe2/utils/proto_convert.cc", + "caffe2/utils/proto_utils.cc", + "caffe2/utils/proto_wrap.cc", + "caffe2/utils/signal_handler.cc", + "caffe2/utils/smart_tensor_printer.cc", + "caffe2/utils/string_utils.cc", + "caffe2/utils/threadpool/ThreadPool.cc", + "caffe2/utils/threadpool/ThreadPoolMobile.cc", + "caffe2/utils/threadpool/pthreadpool.cc", + "caffe2/utils/threadpool/pthreadpool_impl.cc", + ], +) + +filegroup( + name = "caffe2_cuda_srcs", + srcs = [ + "caffe2/contrib/aten/aten_op_gpu.cc", + "caffe2/contrib/gloo/allreduce_ops_gpu.cc", + "caffe2/contrib/gloo/broadcast_ops_gpu.cc", + "caffe2/contrib/gloo/common_world_ops_gpu.cc", + "caffe2/core/blob_serialization_gpu.cc", + "caffe2/core/common_cudnn.cc", + "caffe2/core/common_gpu.cc", + "caffe2/core/event_gpu.cc", + "caffe2/db/create_db_op_gpu.cc", + "caffe2/distributed/file_store_handler_op_gpu.cc", + "caffe2/operators/communicator_op_gpu.cc", + "caffe2/operators/concat_split_op_gpu.cc", + "caffe2/operators/conv_op_cache_cudnn.cc", + "caffe2/operators/conv_op_cudnn.cc", + "caffe2/operators/conv_op_gpu.cc", + "caffe2/operators/conv_op_shared_gpu.cc", + "caffe2/operators/conv_transpose_op_cudnn.cc", + "caffe2/operators/conv_transpose_op_gpu.cc", + "caffe2/operators/counter_ops_gpu.cc", + "caffe2/operators/do_op_gpu.cc", + "caffe2/operators/dropout_op_cudnn.cc", + "caffe2/operators/elementwise_add_op_gpu.cc", + "caffe2/operators/elementwise_sub_op_gpu.cc", + "caffe2/operators/elu_op_cudnn.cc", + "caffe2/operators/exp_op_gpu.cc", + "caffe2/operators/expand_op_gpu.cc", + "caffe2/operators/expand_squeeze_dims_op_gpu.cc", + "caffe2/operators/free_op_gpu.cc", + "caffe2/operators/fully_connected_op_gpu.cc", + "caffe2/operators/if_op_gpu.cc", + "caffe2/operators/im2col_op_gpu.cc", + "caffe2/operators/load_save_op_gpu.cc", + "caffe2/operators/local_response_normalization_op_cudnn.cc", + "caffe2/operators/locally_connected_op_gpu.cc", + "caffe2/operators/log_op_gpu.cc", + "caffe2/operators/matmul_op_gpu.cc", + "caffe2/operators/negate_gradient_op_gpu.cc", + "caffe2/operators/negative_op_gpu.cc", + "caffe2/operators/order_switch_ops_cudnn.cc", + "caffe2/operators/order_switch_ops_gpu.cc", + "caffe2/operators/pool_op_cudnn.cc", + "caffe2/operators/prepend_dim_op_gpu.cc", + "caffe2/operators/reshape_op_gpu.cc", + "caffe2/operators/rnn/recurrent_network_blob_fetcher_op_gpu.cc", + "caffe2/operators/rnn/recurrent_network_executor_gpu.cc", + "caffe2/operators/rnn/recurrent_op_cudnn.cc", + "caffe2/operators/scale_op_gpu.cc", + "caffe2/operators/shape_op_gpu.cc", + "caffe2/operators/sigmoid_op_cudnn.cc", + "caffe2/operators/softmax_op_cudnn.cc", + "caffe2/operators/sqr_op_gpu.cc", + "caffe2/operators/sqrt_op_gpu.cc", + "caffe2/operators/stop_gradient_gpu.cc", + "caffe2/operators/tanh_op_cudnn.cc", + "caffe2/operators/tensor_protos_db_input_gpu.cc", + "caffe2/operators/transpose_op_cudnn.cc", + "caffe2/operators/while_op_gpu.cc", + "caffe2/operators/zero_gradient_op_gpu.cc", + "caffe2/queue/queue_ops_gpu.cc", + "caffe2/sgd/iter_op_gpu.cc", + "caffe2/sgd/learning_rate_op_gpu.cc", + ], +) + +filegroup( + name = "caffe2_cu_srcs", + srcs = [ + "caffe2/core/context_gpu.cu.cc", + "caffe2/operators/abs_op.cu.cc", + "caffe2/operators/accumulate_op.cu.cc", + "caffe2/operators/accuracy_op.cu.cc", + "caffe2/operators/acos_op.cu.cc", + "caffe2/operators/affine_channel_op.cu.cc", + "caffe2/operators/alias_with_name.cu.cc", + "caffe2/operators/arg_ops.cu.cc", + "caffe2/operators/asin_op.cu.cc", + "caffe2/operators/assert_op.cu.cc", + "caffe2/operators/atan_op.cu.cc", + "caffe2/operators/batch_gather_ops.cu.cc", + "caffe2/operators/batch_matmul_op.cu.cc", + "caffe2/operators/batch_moments_op.cu.cc", + "caffe2/operators/batch_permutation_op.cu.cc", + "caffe2/operators/batch_sparse_to_dense_op.cu.cc", + "caffe2/operators/boolean_mask_ops.cu.cc", + "caffe2/operators/boolean_unmask_ops.cu.cc", + "caffe2/operators/bucketize_op.cu.cc", + "caffe2/operators/cast_op.cu.cc", + "caffe2/operators/cbrt_op.cu.cc", + "caffe2/operators/ceil_op.cu.cc", + "caffe2/operators/channel_backprop_stats_op.cu.cc", + "caffe2/operators/channel_shuffle_op.cu.cc", + "caffe2/operators/channel_stats_op.cu.cc", + "caffe2/operators/channelwise_conv3d_op_cudnn.cu.cc", + "caffe2/operators/clip_op.cu.cc", + "caffe2/operators/copy_op.cu.cc", + "caffe2/operators/cos_op.cu.cc", + "caffe2/operators/cosh_op.cu.cc", + "caffe2/operators/cosine_embedding_criterion_op.cu.cc", + "caffe2/operators/cross_entropy_op.cu.cc", + "caffe2/operators/cube_op.cu.cc", + "caffe2/operators/data_couple_gpu.cu.cc", + "caffe2/operators/deform_conv_op.cu.cc", + "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu.cc", + "caffe2/operators/distance_op.cu.cc", + "caffe2/operators/dropout_op.cu.cc", + "caffe2/operators/elementwise_div_op.cu.cc", + "caffe2/operators/elementwise_linear_op.cu.cc", + "caffe2/operators/elementwise_mul_op.cu.cc", + "caffe2/operators/elementwise_ops.cu.cc", + "caffe2/operators/elu_op.cu.cc", + "caffe2/operators/enforce_finite_op.cu.cc", + "caffe2/operators/ensure_cpu_output_op.cu.cc", + "caffe2/operators/erf_op.cu.cc", + "caffe2/operators/filler_op.cu.cc", + "caffe2/operators/find_op.cu.cc", + "caffe2/operators/floor_op.cu.cc", + "caffe2/operators/gather_op.cu.cc", + "caffe2/operators/gelu_op.cu.cc", + "caffe2/operators/generate_proposals_op.cu.cc", + "caffe2/operators/generate_proposals_op_util_nms_gpu.cu.cc", + "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu.cc", + "caffe2/operators/given_tensor_fill_op.cu.cc", + "caffe2/operators/glu_op.cu.cc", + "caffe2/operators/group_norm_op.cu.cc", + "caffe2/operators/gru_unit_op_gpu.cu.cc", + "caffe2/operators/half_float_ops.cu.cc", + "caffe2/operators/hard_sigmoid_op.cu.cc", + "caffe2/operators/instance_norm_op.cu.cc", + "caffe2/operators/integral_image_op.cu.cc", + "caffe2/operators/layer_norm_op.cu.cc", + "caffe2/operators/leaky_relu_op.cu.cc", + "caffe2/operators/lengths_pad_op.cu.cc", + "caffe2/operators/lengths_tile_op.cu.cc", + "caffe2/operators/local_response_normalization_op.cu.cc", + "caffe2/operators/logit_op.cu.cc", + "caffe2/operators/loss_op.cu.cc", + "caffe2/operators/lp_pool_op.cu.cc", + "caffe2/operators/lstm_unit_op_gpu.cu.cc", + "caffe2/operators/margin_ranking_criterion_op.cu.cc", + "caffe2/operators/max_pool_with_index.cu.cc", + "caffe2/operators/mean_op.cu.cc", + "caffe2/operators/mem_query_op.cu.cc", + "caffe2/operators/minmax_ops.cu.cc", + "caffe2/operators/moments_op.cu.cc", + "caffe2/operators/multi_class_accuracy_op.cu.cc", + "caffe2/operators/normalize_ops.cu.cc", + "caffe2/operators/one_hot_ops.cu.cc", + "caffe2/operators/pack_segments.cu.cc", + "caffe2/operators/pad_op_gpu.cu.cc", + "caffe2/operators/perplexity_op.cu.cc", + "caffe2/operators/piecewise_linear_transform_op.cu.cc", + "caffe2/operators/pool_op.cu.cc", + "caffe2/operators/pow_op.cu.cc", + "caffe2/operators/prelu_op.cu.cc", + "caffe2/operators/reciprocal_op.cu.cc", + "caffe2/operators/reduce_front_back_max_ops.cu.cc", + "caffe2/operators/reduce_front_back_sum_mean_ops.cu.cc", + "caffe2/operators/reduce_ops.cu.cc", + "caffe2/operators/reduction_ops.cu.cc", + "caffe2/operators/relu_n_op.cu.cc", + "caffe2/operators/relu_op.cu.cc", + "caffe2/operators/replace_nan_op.cu.cc", + "caffe2/operators/resize_3d_op.cu.cc", + "caffe2/operators/resize_op.cu.cc", + "caffe2/operators/reverse_packed_segs_op.cu.cc", + "caffe2/operators/rmac_regions_op.cu.cc", + "caffe2/operators/rnn/recurrent_network_op_gpu.cu.cc", + "caffe2/operators/roi_align_gradient_op.cu.cc", + "caffe2/operators/roi_align_op.cu.cc", + "caffe2/operators/roi_align_rotated_gradient_op.cu.cc", + "caffe2/operators/roi_align_rotated_op.cu.cc", + "caffe2/operators/roi_pool_op.cu.cc", + "caffe2/operators/rsqrt_op.cu.cc", + "caffe2/operators/scale_blobs_op.cu.cc", + "caffe2/operators/segment_reduction_op_gpu.cu.cc", + "caffe2/operators/selu_op.cu.cc", + "caffe2/operators/sequence_ops.cu.cc", + "caffe2/operators/sigmoid_op.cu.cc", + "caffe2/operators/sin_op.cu.cc", + "caffe2/operators/sinh_op.cu.cc", + "caffe2/operators/slice_op.cu.cc", + "caffe2/operators/softmax_ops.cu.cc", + "caffe2/operators/softplus_op.cu.cc", + "caffe2/operators/softsign_op.cu.cc", + "caffe2/operators/space_batch_op_gpu.cu.cc", + "caffe2/operators/sparse_normalize_op_gpu.cu.cc", + "caffe2/operators/sparse_to_dense_op.cu.cc", + "caffe2/operators/spatial_batch_norm_op.cu.cc", + "caffe2/operators/spatial_batch_norm_op_cudnn.cu.cc", + "caffe2/operators/stump_func_op.cu.cc", + "caffe2/operators/summarize_op.cu.cc", + "caffe2/operators/swish_op.cu.cc", + "caffe2/operators/tan_op.cu.cc", + "caffe2/operators/tanh_op.cu.cc", + "caffe2/operators/thresholded_relu_op.cu.cc", + "caffe2/operators/tile_op.cu.cc", + "caffe2/operators/top_k.cu.cc", + "caffe2/operators/transpose_op.cu.cc", + "caffe2/operators/unique_ops.cu.cc", + "caffe2/operators/upsample_op.cu.cc", + "caffe2/operators/utility_ops.cu.cc", + "caffe2/operators/weighted_sample_op.cu.cc", + "caffe2/sgd/adadelta_op_gpu.cu.cc", + "caffe2/sgd/adagrad_op_gpu.cu.cc", + "caffe2/sgd/adam_op_gpu.cu.cc", + "caffe2/sgd/fp16_momentum_sgd_op.cu.cc", + "caffe2/sgd/fp32_momentum_sgd_op.cu.cc", + "caffe2/sgd/lars_op_gpu.cu.cc", + "caffe2/sgd/momentum_sgd_op_gpu.cu.cc", + "caffe2/sgd/rmsprop_op_gpu.cu.cc", + "caffe2/sgd/yellowfin_op_gpu.cu.cc", + "caffe2/utils/math/broadcast.cu.cc", + "caffe2/utils/math/elementwise.cu.cc", + "caffe2/utils/math/reduce.cu.cc", + "caffe2/utils/math/transpose.cu.cc", + "caffe2/utils/math_gpu.cu.cc", + ], +) + +# To achieve finer granularity and make debug easier, caffe2 is split into three libraries: +# ATen, caffe2 and caffe2_for_aten_headers. ATen lib group up source codes under +# aten/ directory and caffe2 contains most files under `caffe2/` directory. Since the +# ATen lib and the caffe2 lib would depend on each other, `caffe2_for_aten_headers` is splitted +# out from `caffe2` to avoid dependency cycle. +cc_library( + name = "caffe2_for_aten_headers", + hdrs = [ + "caffe2/core/macros.h", + "caffe2/core/common.h", + "caffe2/core/logging.h", + "caffe2/core/types.h", + "caffe2/perfkernels/common.h", + "caffe2/perfkernels/embedding_lookup.h", + "caffe2/perfkernels/embedding_lookup_idx.h", + "caffe2/utils/fixed_divisor.h", + "caffe2/utils/cpuid.h", + ] + glob([ + "caffe2/utils/threadpool/*.h", + "caffe2/proto/*.h", + ]), + copts = CAFFE2_COPTS, + visibility = ["//visibility:public"], + deps = [ + ":c10_headers", + ":caffe2_protos", + ], +) + +cc_library( + name = "caffe2_headers", + hdrs = glob([ + "caffe2/contrib/aten/*.h", + "caffe2/contrib/gloo/*.h", + "caffe2/core/*.h", + "caffe2/core/nomnigraph/include/nomnigraph/Converters/*.h", + "caffe2/core/nomnigraph/include/nomnigraph/Generated/*.h", + "caffe2/core/nomnigraph/include/nomnigraph/Graph/*.h", + "caffe2/core/nomnigraph/include/nomnigraph/Representations/*.h", + "caffe2/core/nomnigraph/include/nomnigraph/Support/*.h", + "caffe2/core/nomnigraph/include/nomnigraph/Transformations/*.h", + "caffe2/core/nomnigraph/tests/*.h", + "caffe2/db/*.h", + "caffe2/distributed/*.h", + "caffe2/ideep/*.h", + "caffe2/ideep/operators/*.h", + "caffe2/ideep/operators/quantization/*.h", + "caffe2/ideep/utils/*.h", + "caffe2/onnx/*.h", + "caffe2/operators/*.h", + "caffe2/operators/experimental/c10/cpu/*.h", + "caffe2/operators/rnn/*.h", + "caffe2/opt/*.h", + "caffe2/perfkernels/*.h", + "caffe2/predictor/*.h", + "caffe2/predictor/emulator/*.h", + "caffe2/proto/*.h", + "caffe2/quantization/server/*.h", + "caffe2/queue/*.h", + "caffe2/serialize/*.h", + "caffe2/sgd/*.h", + "caffe2/share/contrib/depthwise/*.h", + "caffe2/transforms/*.h", + "caffe2/utils/*.h", + "caffe2/utils/math/*.h", + "caffe2/utils/threadpool/*.h", + "modules/**/*.h", + ]) + if_cuda(glob([ + "caffe2/**/*.cuh", + "caffe2/image/*.h", + ])), + copts = CAFFE2_COPTS, + includes = [ + "caffe2/contrib/aten", + "caffe2/core/nomnigraph/include", + "third_party/miniz-2.0.8", + ], + visibility = ["//visibility:public"], + deps = [ + ":caffe2_for_aten_headers", + ":caffe2_protos", + ], +) + +cc_library( + name = "caffe2_dnnlowp_avx2_ops", + srcs = [ + "caffe2/quantization/server/elementwise_sum_dnnlowp_op_avx2.cc", + "caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc", + "caffe2/quantization/server/group_norm_dnnlowp_op_avx2.cc", + "caffe2/quantization/server/norm_minimization_avx2.cc", + "caffe2/quantization/server/pool_dnnlowp_op_avx2.cc", + "caffe2/quantization/server/relu_dnnlowp_op_avx2.cc", + "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_avx2.cc", + "caffe2/quantization/server/transpose.cc", + ], + copts = CAFFE2_COPTS + [ + "-mf16c", + "-mavx2", + "-mfma", + "-mxsave", + ], + visibility = ["//visibility:public"], + deps = [ + ":caffe2_headers", + "@fbgemm", + ], + alwayslink = True, +) + +cc_library( + name = "caffe2", + srcs = [ + "caffe2/db/create_db_op.cc", + "caffe2/db/protodb.cc", + "caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc", + ":caffe2_contrib_srcs", + ":caffe2_core_srcs", + ":caffe2_distributed_srcs", + ":caffe2_ideep_srcs", + ":caffe2_onnx_srcs", + ":caffe2_operators_srcs", + ":caffe2_opt_srcs", + ":caffe2_perfkernels_srcs", + ":caffe2_predictor_srcs", + ":caffe2_quantization_srcs", + ":caffe2_queue_srcs", + ":caffe2_serialize_srcs", + ":caffe2_sgd_srcs", + ":caffe2_transforms_srcs", + ":caffe2_utils_srcs", + ], + copts = CAFFE2_COPTS + ["-mf16c"], + linkstatic = 1, + visibility = ["//visibility:public"], + deps = [ + ":caffe2_headers", + ":caffe2_dnnlowp_avx2_ops", + ":caffe2_perfkernels_avx", + ":caffe2_perfkernels_avx2", + ":caffe2_perfkernels_avx512", + ":caffe2_protos", + "//third_party/miniz-2.0.8:miniz", + "@com_google_protobuf//:protobuf", + "@eigen", + "@foxi", + "@gloo", + "@onnx", + ] + if_cuda( + [ + ":caffe2_cpp_cuda", + ":aten_cuda", + ], + [":aten"], + ), + alwayslink = True, +) + +cc_library( + name = "caffe2_cpp_cuda", + srcs = [":caffe2_cuda_srcs"], + copts = CAFFE2_COPTS, + visibility = ["//visibility:public"], + deps = [ + ":caffe2_cuda", + ":caffe2_headers", + ], + alwayslink = True, +) + +cu_library( + name = "caffe2_cuda", + srcs = [":caffe2_cu_srcs"], + copts = CAFFE2_COPTS + torch_cuda_half_options, + visibility = ["//visibility:public"], + deps = [ + ":aten", + ":caffe2_headers", + "@cub", + "@cuda//:cublas", + "@cuda//:curand", + "@cudnn", + "@eigen", + "@gloo", + ], + alwayslink = True, +) + +PERF_COPTS = [ + "-DHAVE_GCC_GET_CPUID", + "-DUSE_AVX", + "-DUSE_AVX2", + "-DTH_HAVE_THREAD", + "-DHAVE_AVX_CPU_DEFINITION", + "-DHAVE_AVX2_CPU_DEFINITION", + "-DENABLE_ALIAS=1", + "-DHAVE_MALLOC_USABLE_SIZE=1", + "-DHAVE_MMAP=1", + "-DHAVE_SHM_OPEN=1", + "-DHAVE_SHM_UNLINK=1", + "-DSLEEF_STATIC_LIBS=1", + "-D_FILE_OFFSET_BITS=64", + "-DUSE_FBGEMM", + "-fvisibility-inlines-hidden", + "-Wunused-parameter", + "-fno-math-errno", + "-fno-trapping-math", + "-mf16c", +] + +PERF_HEADERS = glob([ + "caffe2/perfkernels/*.h", + "caffe2/core/*.h", +]) + +cc_library( + name = "caffe2_perfkernels_avx", + srcs = glob([ + "caffe2/perfkernels/*_avx.cc", + ]), + hdrs = PERF_HEADERS, + copts = PERF_COPTS + [ + "-mavx", + ], + visibility = ["//visibility:public"], + deps = [ + ":caffe2_headers", + ":c10", + ], + alwayslink = True, +) + +cc_library( + name = "caffe2_perfkernels_avx2", + srcs = glob([ + "caffe2/perfkernels/*_avx2.cc", + ]), + hdrs = PERF_HEADERS, + copts = PERF_COPTS + [ + "-mavx2", + "-mfma", + "-mavx", + ], + visibility = ["//visibility:public"], + deps = [ + ":caffe2_headers", + ":c10", + ], + alwayslink = True, +) + +cc_library( + name = "caffe2_perfkernels_avx512", + srcs = [ + "caffe2/perfkernels/common_avx512.cc", + ], + hdrs = PERF_HEADERS, + copts = PERF_COPTS + [ + "-mavx512f", + "-mavx512dq", + "-mavx512vl", + "-mavx2", + "-mfma", + "-mavx", + ], + visibility = ["//visibility:public"], + deps = [ + ":caffe2_headers", + ":c10", + ], + alwayslink = True, +) + +# torch +cc_library( + name = "torch_headers", + hdrs = if_cuda( + glob( + [ + "torch/csrc/cuda/*.h", + ], + ), + ) + glob( + [ + "torch/*.h", + "torch/csrc/*.h", + "torch/csrc/api/include/torch/*.h", + "torch/csrc/api/include/torch/data/*.h", + "torch/csrc/api/include/torch/data/dataloader/*.h", + "torch/csrc/api/include/torch/data/datasets/*.h", + "torch/csrc/api/include/torch/data/detail/*.h", + "torch/csrc/api/include/torch/data/samplers/*.h", + "torch/csrc/api/include/torch/data/transforms/*.h", + "torch/csrc/api/include/torch/detail/*.h", + "torch/csrc/api/include/torch/nn/*.h", + "torch/csrc/api/include/torch/nn/functional/*.h", + "torch/csrc/api/include/torch/nn/modules/*.h", + "torch/csrc/api/include/torch/nn/modules/container/*.h", + "torch/csrc/api/include/torch/nn/options/*.h", + "torch/csrc/api/include/torch/nn/parallel/*.h", + "torch/csrc/api/include/torch/nn/utils/*.h", + "torch/csrc/api/include/torch/optim/*.h", + "torch/csrc/api/include/torch/python/*.h", + "torch/csrc/api/include/torch/serialize/*.h", + "torch/csrc/autograd/*.h", + "torch/csrc/autograd/functions/*.h", + "torch/csrc/autograd/utils/*.h", + "torch/csrc/distributed/autograd/functions/*.h", + "torch/csrc/distributed/autograd/context/*.h", + "torch/csrc/distributed/autograd/engine/*.h", + "torch/csrc/distributed/autograd/rpc_messages/*.h", + "torch/csrc/distributed/autograd/*.h", + "torch/csrc/distributed/c10d/*.h", + "torch/csrc/distributed/rpc/*.h", + "torch/csrc/generic/*.h", + "torch/csrc/generic/*.cpp", + "torch/csrc/jit/*.h", + "torch/csrc/jit/api/*.h", + "torch/csrc/jit/codegen/cuda/*.h", + "torch/csrc/jit/codegen/fuser/*.h", + "torch/csrc/jit/codegen/fuser/cpu/*.h", + "torch/csrc/jit/codegen/fuser/cuda/*.h", + "torch/csrc/jit/ir/*.h", + "torch/csrc/jit/fuser/*.h", + "torch/csrc/jit/fuser/cpu/*.h", + "torch/csrc/jit/fuser/cuda/*.h", + "torch/csrc/jit/passes/*.h", + "torch/csrc/jit/passes/onnx/*.h", + "torch/csrc/jit/passes/utils/*.h", + "torch/csrc/jit/python/*.h", + "torch/csrc/jit/runtime/*.h", + "torch/csrc/jit/frontend/*.h", + "torch/csrc/jit/mobile/*.h", + "torch/csrc/jit/serialization/*.h", + "torch/csrc/jit/tensorexpr/*.h", + "torch/csrc/jit/testing/*.h", + "torch/csrc/multiprocessing/*.h", + "torch/csrc/onnx/*.h", + "torch/csrc/tensor/*.h", + "torch/csrc/utils/*.h", + "torch/lib/libshm/*.h", + "torch/lib/c10d/*.hpp", + ], + exclude = [ + "torch/lib/c10d/ProcessGroupMPI.hpp", + "torch/lib/c10d/ProcessGroupNCCL.hpp", + ], + ) + [":generated_code"], + includes = [ + "torch/csrc", + "torch/csrc/api/include", + "torch/lib", + "torch/lib/libshm", + ], + visibility = ["//visibility:public"], + deps = [ + ":aten_headers", + ":c10_headers", + ":caffe2_headers", + "@onnx", + ], + alwayslink = True, +) + +TORCH_COPTS = COMMON_COPTS + [ + "-Dtorch_EXPORTS", + "-DHAVE_AVX_CPU_DEFINITION", + "-DHAVE_AVX2_CPU_DEFINITION", + "-DCAFFE2_USE_GLOO", + "-fvisibility-inlines-hidden", + "-fno-math-errno ", + "-fno-trapping-math", +] + +filegroup( + name = "torch_srcs", + srcs = [ + "torch/csrc/jit/tensorexpr/codegen.cpp", + "torch/csrc/jit/tensorexpr/eval.cpp", + "torch/csrc/jit/tensorexpr/expr.cpp", + "torch/csrc/jit/tensorexpr/function.cpp", + "torch/csrc/jit/tensorexpr/hash_provider.cpp", + "torch/csrc/jit/tensorexpr/ir.cpp", + "torch/csrc/jit/tensorexpr/ir_mutator.cpp", + "torch/csrc/jit/tensorexpr/ir_printer.cpp", + "torch/csrc/jit/tensorexpr/ir_simplifier.cpp", + "torch/csrc/jit/tensorexpr/ir_visitor.cpp", + "torch/csrc/jit/tensorexpr/kernel.cpp", + "torch/csrc/jit/tensorexpr/llvm_codegen.cpp", + "torch/csrc/jit/tensorexpr/llvm_jit.cpp", + "torch/csrc/jit/tensorexpr/loopnest.cpp", + "torch/csrc/jit/tensorexpr/mem_arena.cpp", + "torch/csrc/jit/tensorexpr/tensor.cpp", + "torch/csrc/jit/tensorexpr/types.cpp", + "torch/csrc/jit/tensorexpr/unique_name_manager.cpp", + ], +) + +cc_library( + name = "torch", + srcs = if_cuda(glob( + [ + "torch/csrc/cuda/*.cpp", + "torch/csrc/autograd/functions/comm.cpp", + "torch/csrc/jit/tensorexpr/*.cpp", + ], + exclude = [ + "torch/csrc/cuda/python_nccl.cpp", + "torch/csrc/cuda/nccl.cpp", + ], + )) + glob( + [ + "torch/csrc/*.cpp", + "torch/csrc/api/src/*.cpp", + "torch/csrc/api/src/data/datasets/*.cpp", + "torch/csrc/api/src/data/samplers/*.cpp", + "torch/csrc/api/src/nn/*.cpp", + "torch/csrc/api/src/nn/modules/*.cpp", + "torch/csrc/api/src/nn/modules/container/*.cpp", + "torch/csrc/api/src/nn/options/*.cpp", + "torch/csrc/api/src/optim/*.cpp", + "torch/csrc/api/src/python/*.cpp", + "torch/csrc/api/src/serialize/*.cpp", + "torch/csrc/autograd/*.cpp", + "torch/csrc/autograd/functions/*.cpp", + "torch/csrc/autograd/generated/*.cpp", + "torch/csrc/distributed/autograd/*.cpp", + "torch/csrc/distributed/autograd/context/*.cpp", + "torch/csrc/distributed/autograd/functions/*.cpp", + "torch/csrc/distributed/autograd/engine/*.cpp", + "torch/csrc/distributed/autograd/rpc_messages/*.cpp", + "torch/csrc/distributed/rpc/*.cpp", + "torch/csrc/jit/*.cpp", + "torch/csrc/jit/api/*.cpp", + "torch/csrc/jit/codegen/fuser/*.cpp", + "torch/csrc/jit/frontend/*.cpp", + "torch/csrc/jit/fuser/*.cpp", + "torch/csrc/jit/fuser/cpu/*.cpp", + "torch/csrc/jit/ir/*.cpp", + "torch/csrc/jit/generated/*.cpp", + "torch/csrc/jit/passes/*.cpp", + "torch/csrc/jit/passes/onnx/*.cpp", + "torch/csrc/jit/passes/utils/*.cpp", + "torch/csrc/jit/mobile/*.cpp", + "torch/csrc/jit/python/*.cpp", + "torch/csrc/jit/runtime/*.cpp", + "torch/csrc/jit/serialization/*.cpp", + "torch/csrc/jit/testing/*.cpp", + "torch/csrc/multiprocessing/*.cpp", + "torch/csrc/onnx/*.cpp", + "torch/csrc/tensor/*.cpp", + "torch/csrc/utils/*.cpp", + "torch/lib/libshm/*.cpp", + "torch/lib/c10d/*.cpp", + ], + exclude = glob([ + "torch/csrc/autograd/*_cuda.cpp", + ]) + [ + "torch/csrc/autograd/functions/comm.cpp", + "torch/csrc/autograd/generated/VariableTypeEverything.cpp", + "torch/lib/libshm/manager.cpp", + "torch/lib/c10d/NCCLUtils.cpp", + "torch/lib/c10d/ProcessGroupMPI.cpp", + "torch/lib/c10d/ProcessGroupNCCL.cpp", + ], + ) + [ + "torch/csrc/jit/codegen/cuda/interface.cpp", + ":torch_srcs", + ":generated_code", + ], + copts = TORCH_COPTS + if_cuda(["-DUSE_CUDA=1"]), + defines = [ + "CAFFE2_NIGHTLY_VERSION=20200115", + ], + linkopts = [ + "-Wl,--rpath", + "-Wl,/opt/conda/lib", + "-L/opt/conda/lib", + "-lpython3.6m", + ], + visibility = ["//visibility:public"], + deps = [ + ":caffe2", + ":torch_headers", + "@local_config_python//:python_headers", + "@pybind11", + ], + alwayslink = True, +) + +cc_library( + name = "libtorch_headers", + hdrs = glob([ + "**/*.h", + "**/*.cuh", + ]) + [ + ":generated_code", + ], + includes = [ + ".", + "torch/csrc/api/include", + "torch/lib", + "torch/lib/libshm", + ], + visibility = ["//visibility:public"], + deps = [ + ":aten_headers", + ":c10_headers", + ":caffe2_headers", + ], +) + +# cpp api tests +cc_library( + name = "test_support", + testonly = True, + srcs = [ + "test/cpp/api/support.cpp", + ], + hdrs = [ + "test/cpp/api/support.h", + "test/cpp/common/support.h", + ], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "any_test", + srcs = ["test/cpp/api/any.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "autograd_test", + srcs = ["test/cpp/api/autograd.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "dataloader_test", + srcs = ["test/cpp/api/dataloader.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "enum_test", + srcs = ["test/cpp/api/enum.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "expanding_array_test", + srcs = ["test/cpp/api/expanding-array.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "functional_test", + srcs = ["test/cpp/api/functional.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "init_test", + srcs = [ + "test/cpp/api/init.cpp", + "test/cpp/api/init_baseline.h", + ], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +# Torch integration tests rely on a labeled data set from the MNIST database. +# http://yann.lecun.com/exdb/mnist/ +cc_test( + name = "integration_test", + srcs = ["test/cpp/api/integration.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "jit_test", + srcs = ["test/cpp/api/jit.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "memory_test", + srcs = ["test/cpp/api/memory.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "misc_test", + srcs = ["test/cpp/api/misc.cpp"], + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "module_test", + srcs = ["test/cpp/api/module.cpp"], + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "modulelist_test", + srcs = ["test/cpp/api/modulelist.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "modules_test", + srcs = ["test/cpp/api/modules.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "nn_utils_test", + srcs = ["test/cpp/api/nn_utils.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "optim_test", + srcs = [ + "test/cpp/api/optim.cpp", + "test/cpp/api/optim_baseline.h", + ], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "ordered_dict_test", + srcs = ["test/cpp/api/ordered_dict.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "parallel_test", + srcs = ["test/cpp/api/parallel.cpp"], + copts = COMMON_COPTS, + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "rnn_test", + size = "small", + srcs = ["test/cpp/api/rnn.cpp"], + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "sequential_test", + size = "small", + srcs = ["test/cpp/api/sequential.cpp"], + copts = ["-Wno-deprecated-declarations"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "serialize_test", + size = "small", + srcs = ["test/cpp/api/serialize.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "static_test", + size = "small", + srcs = ["test/cpp/api/static.cpp"], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "tensor_test", + size = "small", + srcs = ["test/cpp/api/tensor.cpp"], + copts = ["-Wno-deprecated-declarations"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "tensor_cuda_test", + size = "small", + srcs = ["test/cpp/api/tensor_cuda.cpp"], + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "tensor_options_test", + size = "small", + srcs = ["test/cpp/api/tensor_options.cpp"], + deps = [ + ":test_support", + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "tensor_options_cuda_test", + size = "small", + srcs = ["test/cpp/api/tensor_options_cuda.cpp"], + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "torch_include_test", + size = "small", + srcs = ["test/cpp/api/torch_include.cpp"], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +test_suite( + name = "api_tests", + tests = [ + "any_test", + "autograd_test", + "dataloader_test", + "enum_test", + "expanding_array_test", + "functional_test", + "init_test", + "integration_test", + "jit_test", + "memory_test", + "misc_test", + "module_test", + "modulelist_test", + "modules_test", + "nn_utils_test", + "optim_test", + "ordered_dict_test", + "rnn_test", + "sequential_test", + "serialize_test", + "static_test", + "tensor_options_test", + "tensor_test", + "torch_include_test", + ], +) + +# dist autograd tests +cc_test( + name = "torch_dist_autograd_test", + size = "small", + srcs = ["test/cpp/dist_autograd/test_dist_autograd.cpp"], + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +# jit tests +# Because these individual unit tests require custom registering, +# it is easier to mimic the cmake build by globing together a single test. +cc_test( + name = "jit_tests", + size = "small", + srcs = glob([ + "test/cpp/jit/*.cpp", + "test/cpp/jit/*.h", + "test/cpp/tensorexpr/*.cpp", + "test/cpp/tensorexpr/*.h", + ]), + linkstatic = True, + tags = [ + "exclusive", + "gpu-required", + ], + deps = [ + ":torch", + "@com_google_googletest//:gtest_main", + ], +) + +# all tests +test_suite( + name = "all_tests", + tests = [ + "api_tests", + "c10_tests", + "jit_tests", + "torch_dist_autograd_test", + ], +) diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 00000000000..53696dc87f3 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,160 @@ +workspace(name = "pytorch") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//tools/rules:workspace.bzl", "new_patched_local_repository") + +http_archive( + name = "bazel_skylib", + urls = [ + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz", + ], +) + +http_archive( + name = "com_google_googletest", + strip_prefix = "googletest-cd6b9ae3243985d4dc725abd513a874ab4161f3e", + urls = [ + "https://github.com/google/googletest/archive/cd6b9ae3243985d4dc725abd513a874ab4161f3e.tar.gz", + ], +) + +http_archive( + name = "pybind11_bazel", + strip_prefix = "pybind11_bazel-7f397b5d2cc2434bbd651e096548f7b40c128044", + urls = ["https://github.com/pybind/pybind11_bazel/archive/7f397b5d2cc2434bbd651e096548f7b40c128044.zip"], + sha256 = "e4a9536f49d4a88e3c5a09954de49c4a18d6b1632c457a62d6ec4878c27f1b5b", +) + +new_local_repository( + name = "pybind11", + build_file = "@pybind11_bazel//:pybind11.BUILD", + path = "third_party/pybind11", +) + +http_archive( + name = "com_github_glog", + strip_prefix = "glog-0.4.0", + urls = [ + "https://github.com/google/glog/archive/v0.4.0.tar.gz", + ], +) + +http_archive( + name = "com_github_gflags_gflags", + strip_prefix = "gflags-2.2.2", + urls = [ + "https://github.com/gflags/gflags/archive/v2.2.2.tar.gz", + ], + sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf", +) + +new_local_repository( + name = "gloo", + build_file = "//third_party:gloo.BUILD", + path = "third_party/gloo", +) + +new_local_repository( + name = "onnx", + build_file = "//third_party:onnx.BUILD", + path = "third_party/onnx", +) + +new_local_repository( + name = "foxi", + build_file = "//third_party:foxi.BUILD", + path = "third_party/foxi", +) + +local_repository( + name = "com_google_protobuf", + path = "third_party/protobuf", +) + +new_local_repository( + name = "eigen", + build_file = "//third_party:eigen.BUILD", + path = "third_party/eigen", +) + +new_local_repository( + name = "fbgemm", + build_file = "//third_party:fbgemm.BUILD", + path = "third_party/fbgemm", +) + +new_local_repository( + name = "ideep", + build_file = "//third_party:ideep.BUILD", + path = "third_party/ideep", +) + +new_local_repository( + name = "mkl_dnn", + build_file = "//third_party:mkl-dnn.BUILD", + path = "third_party/ideep/mkl-dnn", +) + +new_local_repository( + name = "cpuinfo", + build_file = "//third_party:cpuinfo.BUILD", + path = "third_party/cpuinfo", +) + +new_local_repository( + name = "asmjit", + build_file = "//third_party:asmjit.BUILD", + path = "third_party/fbgemm/third_party/asmjit", +) + +new_local_repository( + name = "sleef", + build_file = "//third_party:sleef.BUILD", + path = "third_party/sleef", +) + +new_patched_local_repository( + name = "tbb", + patches = [ + "@//third_party:tbb.patch", + ], + patch_strip = 1, + build_file = "//third_party:tbb.BUILD", + path = "third_party/tbb", +) + +http_archive( + name = "mkl", + build_file = "//third_party:mkl.BUILD", + strip_prefix = "lib", + sha256 = "59154b30dd74561e90d547f9a3af26c75b6f4546210888f09c9d4db8f4bf9d4c", + urls = [ + "https://anaconda.org/anaconda/mkl/2020.0/download/linux-64/mkl-2020.0-166.tar.bz2", + ], +) + +http_archive( + name = "mkl_headers", + build_file = "//third_party:mkl_headers.BUILD", + sha256 = "2af3494a4bebe5ddccfdc43bacc80fcd78d14c1954b81d2c8e3d73b55527af90", + urls = [ + "https://anaconda.org/anaconda/mkl-include/2020.0/download/linux-64/mkl-include-2020.0-166.tar.bz2", + ], +) + +http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.0.1/rules_python-0.0.1.tar.gz", + sha256 = "aa96a691d3a8177f3215b14b0edc9641787abaaa30363a080165d06ab65e1161", +) + +load("@pybind11_bazel//:python_configure.bzl", "python_configure") +python_configure(name = "local_config_python") + +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") + +protobuf_deps() + +load("@rules_python//python:repositories.bzl", "py_repositories") + +py_repositories() diff --git a/aten.bzl b/aten.bzl new file mode 100644 index 00000000000..e04944e96ea --- /dev/null +++ b/aten.bzl @@ -0,0 +1,37 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +CPU_CAPABILITY_NAMES = ["DEFAULT", "AVX", "AVX2"] +PREFIX = "aten/src/ATen/native/" + +def intern_build_aten_ops(copts, deps): + for cpu_capability in CPU_CAPABILITY_NAMES: + srcs = [] + for impl in native.glob( + [ + PREFIX + "cpu/*.cpp", + PREFIX + "quantized/cpu/kernels/*.cpp", + ]): + name = impl.replace(PREFIX, "") + out = PREFIX + name + "." + cpu_capability + ".cpp" + native.genrule( + name = name + "_" + cpu_capability + "_cp", + srcs = [impl], + outs = [out], + cmd = "cp $< $@", + ) + srcs.append(out) + + cc_library( + name = "ATen_CPU_" + cpu_capability, + srcs = srcs, + copts = copts + [ + "-DCPU_CAPABILITY=" + cpu_capability, + "-DCPU_CAPABILITY_" + cpu_capability, + ], + deps = deps, + ) + cc_library( + name = "ATen_CPU", + srcs = ["ATen_CPU_" + cpu_capability for cpu_capability in CPU_CAPABILITY_NAMES], + linkstatic = 1, + ) diff --git a/third_party/BUILD b/third_party/BUILD new file mode 100644 index 00000000000..e69de29bb2d diff --git a/third_party/asmjit.BUILD b/third_party/asmjit.BUILD new file mode 100644 index 00000000000..13fadb2e871 --- /dev/null +++ b/third_party/asmjit.BUILD @@ -0,0 +1,28 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "asmjit", + srcs = glob([ + "src/asmjit/core/*.cpp", + "src/asmjit/x86/*.cpp", + ]), + hdrs = glob([ + "src/asmjit/x86/*.h", + "src/asmjit/core/*.h", + "src/asmjit/*.h", + ]), + copts = [ + "-DASMJIT_STATIC", + "-fno-tree-vectorize", + "-std=c++17", + "-fmerge-all-constants", + "-std=gnu++11", + "-DTH_BLAS_MKL", + ], + includes = [ + "asmjit/", + "src/", + ], + linkstatic = True, + visibility = ["//visibility:public"], +) diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD new file mode 100644 index 00000000000..c44895034ee --- /dev/null +++ b/third_party/cpuinfo.BUILD @@ -0,0 +1,56 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "clog", + srcs = [ + "deps/clog/src/clog.c", + ], + hdrs = glob([ + "deps/clog/include/*.h", + ]), + includes = [ + "deps/clog/include/", + ], + linkstatic = True, + visibility = ["//visibility:public"], +) + +cc_library( + name = "cpuinfo", + srcs = glob( + [ + "src/*.c", + "src/linux/*.c", + "src/x86/*.c", + "src/x86/cache/*.c", + "src/x86/linux/*.c", + ], + exclude = [ + "src/x86/mockcpuid.c", + "src/linux/mockfile.c", + ], + ), + hdrs = glob([ + "include/*.h", + "src/*.h", + "src/cpuinfo/*.h", + "src/include/*.h", + "src/x86/*.h", + "src/x86/linux/*.h", + "src/linux/*.h", + ]), + copts = [ + "-DCPUINFO_LOG_LEVEL=2", + "-DTH_BLAS_MKL", + "-D_GNU_SOURCE=1", + ], + includes = [ + "include", + "src", + ], + linkstatic = True, + visibility = ["//visibility:public"], + deps = [ + ":clog", + ], +) diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD new file mode 100644 index 00000000000..a6a73536063 --- /dev/null +++ b/third_party/eigen.BUILD @@ -0,0 +1,91 @@ +# This is BUILD file is derived from https://github.com/tensorflow/tensorflow/blob/master/third_party/eigen.BUILD + +# Description: +# Eigen is a C++ template library for linear algebra: vectors, +# matrices, and related algorithms. + +load("@rules_cc//cc:defs.bzl", "cc_library") + +licenses([ + # Note: Eigen is an MPL2 library that includes GPL v3 and LGPL v2.1+ code. + # We've taken special care to not reference any restricted code. + "reciprocal", # MPL2 + "notice", # Portions BSD +]) + +exports_files(["COPYING.MPL2"]) + +# License-restricted (i.e. not reciprocal or notice) files inside Eigen/... +EIGEN_RESTRICTED_FILES = [ + "Eigen/src/OrderingMethods/Amd.h", + "Eigen/src/SparseCholesky/**", +] + +# Notable transitive dependencies of restricted files inside Eigen/... +EIGEN_RESTRICTED_DEPS = [ + "Eigen/Eigen", + "Eigen/IterativeLinearSolvers", + "Eigen/MetisSupport", + "Eigen/Sparse", + "Eigen/SparseCholesky", + "Eigen/SparseLU", +] + +EIGEN_FILES = [ + "Eigen/**", + "unsupported/Eigen/CXX11/**", + "unsupported/Eigen/FFT", + "unsupported/Eigen/KroneckerProduct", + "unsupported/Eigen/src/FFT/**", + "unsupported/Eigen/src/KroneckerProduct/**", + "unsupported/Eigen/MatrixFunctions", + "unsupported/Eigen/SpecialFunctions", + "unsupported/Eigen/Splines", + "unsupported/Eigen/src/MatrixFunctions/**", + "unsupported/Eigen/src/SpecialFunctions/**", + "unsupported/Eigen/src/Splines/**", + "unsupported/Eigen/NonLinearOptimization", + "unsupported/Eigen/NumericalDiff", + "unsupported/Eigen/src/**", + "unsupported/Eigen/Polynomials", +] + +# List of files picked up by glob but actually part of another target. +EIGEN_EXCLUDE_FILES = ["Eigen/src/Core/arch/AVX/PacketMathGoogleTest.cc"] + +# Disallowed eigen modules/files in rNA: +# * Using the custom STL and memory support, it is not needed and should +# not be used with c++17. +# * We will only support the EulerAnglesZYX provided by //atg/geometry so +# just don't allow people to access the unsupported eigen module. +EIGEN_DISALLOW_FILES = [ + "Eigen/StlSupport/*.h", + "unsupported/Eigen/EulerAngles", + "unsupported/Eigen/src/EulerAngles/**", +] + +# Files known to be under MPL2 license. +EIGEN_MPL2_HEADER_FILES = glob( + EIGEN_FILES, + exclude = EIGEN_EXCLUDE_FILES + + EIGEN_RESTRICTED_FILES + + EIGEN_DISALLOW_FILES + + EIGEN_RESTRICTED_DEPS + [ + # Guarantees any file missed by excludes above will not compile. + "Eigen/src/Core/util/NonMPL2.h", + "Eigen/**/CMakeLists.txt", + ], +) + +cc_library( + name = "eigen", + hdrs = EIGEN_MPL2_HEADER_FILES, + defines = [ + # This define (mostly) guarantees we don't link any problematic + # code. We use it, but we do not rely on it, as evidenced above. + "EIGEN_MPL2_ONLY", + "EIGEN_MAX_ALIGN_BYTES=64", + ], + includes = ["."], + visibility = ["//visibility:public"], +) diff --git a/third_party/fbgemm.BUILD b/third_party/fbgemm.BUILD new file mode 100644 index 00000000000..1769179baee --- /dev/null +++ b/third_party/fbgemm.BUILD @@ -0,0 +1,221 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "fbgemm_src_headers", + hdrs = [ + "src/RefImplementations.h", + ], + include_prefix = "fbgemm", +) + +cc_library( + name = "fbgemm_base", + srcs = [ + "src/EmbeddingSpMDM.cc", + "src/EmbeddingSpMDMNBit.cc", + "src/ExecuteKernel.cc", + "src/ExecuteKernelU8S8.cc", + "src/Fbgemm.cc", + "src/FbgemmBfloat16Convert.cc", + "src/FbgemmConv.cc", + "src/FbgemmFP16.cc", + "src/FbgemmFloat16Convert.cc", + "src/FbgemmI64.cc", + "src/FbgemmI8Spmdm.cc", + "src/GenerateKernelU8S8S32ACC16.cc", + "src/GenerateKernelU8S8S32ACC16Avx512.cc", + "src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc", + "src/GenerateKernelU8S8S32ACC32.cc", + "src/GenerateKernelU8S8S32ACC32Avx512.cc", + "src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc", + "src/GroupwiseConvAcc32Avx2.cc", + "src/PackAMatrix.cc", + "src/PackAWithIm2Col.cc", + "src/PackBMatrix.cc", + "src/PackMatrix.cc", + "src/PackAWithQuantRowOffset.cc", + "src/PackAWithRowOffset.cc", + "src/PackWeightMatrixForGConv.cc", + "src/PackWeightsForConv.cc", + "src/QuantUtils.cc", + "src/RefImplementations.cc", + "src/RowWiseSparseAdagradFused.cc", + "src/SparseAdagrad.cc", + "src/Utils.cc", + # Private headers + "src/CodeCache.h", + "src/CodeGenHelpers.h", + "src/ExecuteKernel.h", + "src/ExecuteKernelGeneric.h", + "src/ExecuteKernelU8S8.h", + "src/FbgemmFP16Common.h", + "src/GenerateKernel.h", + "src/GroupwiseConv.h", + "src/RefImplementations.h", + "src/TransposeUtils.h", + ], + hdrs = [ + "include/fbgemm/FbgemmConvert.h", + "include/fbgemm/FbgemmI64.h", + ], + includes = [ + ".", + "src", + ], + deps = [ + ":fbgemm_avx2", + ":fbgemm_avx512", + ":fbgemm_headers", + ":fbgemm_src_headers", + "@asmjit", + "@cpuinfo", + ], + linkstatic = 1, +) + +cc_library( + name = "fbgemm_avx2_circular", + srcs = [ + "src/FbgemmFloat16ConvertAvx2.cc", + ], + copts = [ + "-mavx2", + "-mf16c", + ], + deps = [ + ":fbgemm_base", + ], + linkstatic = 1, +) + +cc_library( + name = "fbgemm", + visibility = ["//visibility:public"], + deps = [ + ":fbgemm_base", + ":fbgemm_avx2_circular", + ], + linkstatic = 1, +) + +cc_library( + name = "fbgemm_avx2", + srcs = [ + "src/EmbeddingSpMDMAvx2.cc", + "src/FbgemmBfloat16ConvertAvx2.cc", + # "src/FbgemmFloat16ConvertAvx2.cc", + "src/FbgemmI8Depthwise3DAvx2.cc", + "src/FbgemmI8Depthwise3x3Avx2.cc", + "src/FbgemmI8DepthwiseAvx2.cc", + "src/FbgemmI8DepthwisePerChannelQuantAvx2.cc", + "src/OptimizedKernelsAvx2.cc", + "src/PackDepthwiseConvMatrixAvx2.cc", + "src/QuantUtilsAvx2.cc", + "src/UtilsAvx2.cc", + # Inline Assembly sources + "src/FbgemmFP16UKernelsAvx2.cc", + # Private headers + "src/FbgemmFP16Common.h", + "src/FbgemmFP16UKernelsAvx2.h", + "src/FbgemmI8Depthwise2DAvx2-inl.h", + "src/FbgemmI8DepthwiseAvx2-inl.h", + "src/MaskAvx2.h", + "src/OptimizedKernelsAvx2.h", + "src/TransposeUtils.h", + "src/TransposeUtilsAvx2.h", + ], + copts = [ + "-m64", + "-mavx2", + "-mfma", + "-mf16c", + "-masm=intel", + ], + deps = [ + ":fbgemm_headers", + ], + linkstatic = 1, +) + +cc_library( + name = "fbgemm_avx2_headers", + includes = [ + "src", + ], + hdrs = [ + "src/FbgemmFP16UKernelsAvx2.h", + "src/MaskAvx2.h", + "src/OptimizedKernelsAvx2.h", + ], +) + +cc_library( + name = "fbgemm_avx512", + srcs = [ + "src/FbgemmBfloat16ConvertAvx512.cc", + "src/FbgemmFloat16ConvertAvx512.cc", + "src/UtilsAvx512.cc", + # Inline Assembly sources + "src/FbgemmFP16UKernelsAvx512.cc", + "src/FbgemmFP16UKernelsAvx512_256.cc", + # Private headers + "src/FbgemmFP16UKernelsAvx512.h", + "src/FbgemmFP16Common.h", + "src/MaskAvx2.h", + "src/TransposeUtils.h", + "src/TransposeUtilsAvx2.h", + ], + hdrs = [ + "src/FbgemmFP16UKernelsAvx512_256.h", + ], + copts = [ + "-m64", + "-mfma", + "-mavx512f", + "-mavx512bw", + "-mavx512dq", + "-mavx512vl", + "-masm=intel", + ], + deps = [ + ":fbgemm_headers", + ], + linkstatic = 1, +) + +cc_library( + name = "fbgemm_avx512_headers", + includes = [ + "src", + ], + hdrs = [ + "src/FbgemmFP16UKernelsAvx512.h", + "src/FbgemmFP16UKernelsAvx512_256.h", + ], +) + +cc_library( + name = "fbgemm_headers", + hdrs = [ + "include/fbgemm/ConvUtils.h", + "include/fbgemm/Fbgemm.h", + "include/fbgemm/FbgemmBuild.h", + "include/fbgemm/FbgemmConvert.h", + "include/fbgemm/FbgemmEmbedding.h", + "include/fbgemm/FbgemmFP16.h", + "include/fbgemm/FbgemmI64.h", + "include/fbgemm/FbgemmI8DepthwiseAvx2.h", + "include/fbgemm/FbgemmI8Spmdm.h", + "include/fbgemm/OutputProcessing-inl.h", + "include/fbgemm/PackingTraits-inl.h", + "include/fbgemm/QuantUtils.h", + "include/fbgemm/QuantUtilsAvx2.h", + "include/fbgemm/Types.h", + "include/fbgemm/Utils.h", + "include/fbgemm/UtilsAvx2.h", + ], + includes = [ + "include", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/foxi.BUILD b/third_party/foxi.BUILD new file mode 100644 index 00000000000..6e19fa004bb --- /dev/null +++ b/third_party/foxi.BUILD @@ -0,0 +1,16 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "foxi", + srcs = [ + "foxi/onnxifi_loader.c", + ], + hdrs = glob([ + "foxi/*.h", + ]), + includes = [ + ".", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) diff --git a/third_party/gloo.BUILD b/third_party/gloo.BUILD new file mode 100644 index 00000000000..e73b97738cf --- /dev/null +++ b/third_party/gloo.BUILD @@ -0,0 +1,85 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") +load("@//tools/rules:cu.bzl", "cu_library") +load("@//third_party:substitution.bzl", "template_rule") +load("@//tools/config:defs.bzl", "if_cuda") + +template_rule( + name = "gloo_config_cmake_macros", + src = "gloo/config.h.in", + out = "gloo/config.h", + substitutions = { + "@GLOO_VERSION_MAJOR@": "0", + "@GLOO_VERSION_MINOR@": "5", + "@GLOO_VERSION_PATCH@": "0", + "cmakedefine01 GLOO_USE_CUDA": "define GLOO_USE_CUDA 1", + "cmakedefine01 GLOO_USE_NCCL": "define GLOO_USE_NCCL 0", + "cmakedefine01 GLOO_USE_ROCM": "define GLOO_USE_ROCM 0", + "cmakedefine01 GLOO_USE_RCCL": "define GLOO_USE_RCCL 0", + "cmakedefine01 GLOO_USE_REDIS": "define GLOO_USE_REDIS 0", + "cmakedefine01 GLOO_USE_IBVERBS": "define GLOO_USE_IBVERBS 0", + "cmakedefine01 GLOO_USE_MPI": "define GLOO_USE_MPI 0", + "cmakedefine01 GLOO_USE_AVX": "define GLOO_USE_AVX 0", + "cmakedefine01 GLOO_USE_LIBUV": "define GLOO_USE_LIBUV 0", + "cmakedefine01 GLOO_HAVE_TRANSPORT_TCP": "define GLOO_HAVE_TRANSPORT_TCP 1", + "cmakedefine01 GLOO_HAVE_TRANSPORT_IBVERBS": "define GLOO_HAVE_TRANSPORT_IBVERBS 0", + "cmakedefine01 GLOO_HAVE_TRANSPORT_UV": "define GLOO_HAVE_TRANSPORT_UV 0", + }, +) + +cc_library( + name = "gloo_headers", + hdrs = glob( + [ + "gloo/*.h", + "gloo/common/*.h", + "gloo/rendezvous/*.h", + "gloo/transport/*.h", + "gloo/transport/tcp/*.h", + ], + exclude = [ + "gloo/rendezvous/redis_store.h", + ], + ) + ["gloo/config.h"], + includes = [ + ".", + ], +) + +cu_library( + name = "gloo_cuda", + srcs = [ + "gloo/cuda.cu.cc", + "gloo/cuda_private.cu.cc", + ], + visibility = ["//visibility:public"], + deps = [ + ":gloo_headers", + ], + alwayslink = True, +) + +cc_library( + name = "gloo", + srcs = glob( + [ + "gloo/*.cc", + "gloo/common/*.cc", + "gloo/rendezvous/*.cc", + "gloo/transport/*.cc", + "gloo/transport/tcp/*.cc", + ], + exclude = [ + "gloo/cuda*.cc", + "gloo/rendezvous/redis_store.cc", + ], + ), + copts = [ + "-std=gnu++11", + "-std=c++11", + ], + visibility = ["//visibility:public"], + deps = [":gloo_headers"] + if_cuda( + [":gloo_cuda"], + [], + ), +) diff --git a/third_party/ideep.BUILD b/third_party/ideep.BUILD new file mode 100644 index 00000000000..882d5cb342a --- /dev/null +++ b/third_party/ideep.BUILD @@ -0,0 +1,17 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "ideep", + hdrs = glob([ + "include/**/*.hpp", + "include/**/*.h", + ]), + defines = [ + "IDEEP_USE_MKL", + ], + includes = [ + "include/", + ], + visibility = ["//visibility:public"], + deps = ["@mkl_dnn//:mkl-dnn"], +) diff --git a/third_party/miniz-2.0.8/BUILD.bazel b/third_party/miniz-2.0.8/BUILD.bazel new file mode 100644 index 00000000000..c105e73ac2f --- /dev/null +++ b/third_party/miniz-2.0.8/BUILD.bazel @@ -0,0 +1,10 @@ +cc_library( + name = "miniz", + srcs = [ + "miniz.c", + ], + hdrs = [ + "miniz.h", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/mkl-dnn.BUILD b/third_party/mkl-dnn.BUILD new file mode 100644 index 00000000000..fdb887c9cac --- /dev/null +++ b/third_party/mkl-dnn.BUILD @@ -0,0 +1,89 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") +load("@//third_party:substitution.bzl", "template_rule") + +template_rule( + name = "include_dnnl_version", + src = "include/dnnl_version.h.in", + out = "include/dnnl_version.h", + substitutions = { + "@DNNL_VERSION_MAJOR@": "1", + "@DNNL_VERSION_MINOR@": "2", + "@DNNL_VERSION_PATCH@": "0", + "@DNNL_VERSION_HASH@": "70f8b879ea7a0c38caedb3320b7c85e8497ff50d", + }, +) + +template_rule( + name = "include_dnnl_config", + src = "include/dnnl_config.h.in", + out = "include/dnnl_config.h", + substitutions = { + "cmakedefine": "define", + "${DNNL_CPU_THREADING_RUNTIME}": "OMP", + "${DNNL_CPU_RUNTIME}": "OMP", + "${DNNL_GPU_RUNTIME}": "NONE", + }, +) + +cc_library( + name = "mkl-dnn", + srcs = glob([ + "src/common/*.cpp", + "src/cpu/*.cpp", + "src/cpu/binary/*.cpp", + "src/cpu/gemm/*.cpp", + "src/cpu/gemm/bf16/*.cpp", + "src/cpu/gemm/f32/*.cpp", + "src/cpu/gemm/s8x8s32/*.cpp", + "src/cpu/jit_utils/*.cpp", + "src/cpu/jit_utils/jitprofiling/*.c", + "src/cpu/jit_utils/linux_perf/*.cpp", + "src/cpu/matmul/*.cpp", + "src/cpu/resampling/*.cpp", + "src/cpu/rnn/*.cpp", + ]), + hdrs = glob([ + "include/*.h", + "include/*.hpp", + "src/*.hpp", + "src/cpu/**/*.hpp", + "src/cpu/**/*.h", + "src/common/*.hpp", + "src/cpu/rnn/*.hpp", + ]) + [ + "include/dnnl_version.h", + "include/dnnl_config.h", + ], + copts = [ + "-DUSE_AVX", + "-DUSE_AVX2", + "-DDNNL_DLL", + "-DDNNL_DLL_EXPORTS", + "-DDNNL_ENABLE_CONCURRENT_EXEC", + "-DTH_BLAS_MKL", + "-D__STDC_CONSTANT_MACROS", + "-D__STDC_LIMIT_MACROS", + "-fno-strict-overflow", + "-fopenmp", + ] + select({ + "@//tools/config:thread_sanitizer": ["-DMKLDNN_THR=0"], + "//conditions:default": ["-DMKLDNN_THR=2"], + }), + includes = [ + "include/", + "src/", + "src/common/", + "src/cpu/", + "src/cpu/xbyak/", + ], + visibility = ["//visibility:public"], + linkopts = [ + "-lgomp", + ], + deps = [ + "@mkl", + ] + select({ + "@//tools/config:thread_sanitizer": [], + "//conditions:default": ["@tbb"], + }), +) diff --git a/third_party/mkl.BUILD b/third_party/mkl.BUILD new file mode 100644 index 00000000000..bc868b24e83 --- /dev/null +++ b/third_party/mkl.BUILD @@ -0,0 +1,21 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "mkl", + srcs = [ + "libmkl_avx2.so", + "libmkl_core.so", + "libmkl_def.so", + "libmkl_intel_lp64.so", + "libmkl_rt.so", + "libmkl_sequential.so", + "libmkl_vml_avx2.so", + "libmkl_vml_avx512.so", + "libmkl_vml_def.so", + ] + select({ + "@//tools/config:thread_sanitizer": [], + "//conditions:default": ["libmkl_tbb_thread.so"], + }), + visibility = ["//visibility:public"], + deps = ["@mkl_headers"], +) diff --git a/third_party/mkl_headers.BUILD b/third_party/mkl_headers.BUILD new file mode 100644 index 00000000000..965801c91aa --- /dev/null +++ b/third_party/mkl_headers.BUILD @@ -0,0 +1,8 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +cc_library( + name = "mkl_headers", + hdrs = glob(["include/*.h"]), + includes = ["include/"], + visibility = ["//visibility:public"], +) diff --git a/third_party/onnx.BUILD b/third_party/onnx.BUILD new file mode 100644 index 00000000000..b8809deb4fe --- /dev/null +++ b/third_party/onnx.BUILD @@ -0,0 +1,113 @@ +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_proto_library") +load("@rules_python//python:defs.bzl", "py_binary") + +py_binary( + name = "gen_proto", + srcs = ["onnx/gen_proto.py"], + data = [ + "onnx/onnx.in.proto", + "onnx/onnx-operators.in.proto", + ], +) + +genrule( + name = "generate_onnx_proto", + outs = [ + "onnx/onnx_onnx_torch-ml.proto", + "onnx/onnx-ml.pb.h", + ], + cmd = "$(location :gen_proto) -p onnx_torch -o $(@D)/onnx onnx -m >/dev/null && sed -i 's/onnx_onnx_torch-ml.pb.h/onnx\\/onnx_onnx_torch-ml.pb.h/g' $(@D)/onnx/onnx-ml.pb.h", + tools = [":gen_proto"], +) + +genrule( + name = "generate_onnx_operators_proto", + outs = [ + "onnx/onnx-operators_onnx_torch-ml.proto", + "onnx/onnx-operators-ml.pb.h", + ], + cmd = "$(location :gen_proto) -p onnx_torch -o $(@D)/onnx onnx-operators -m >/dev/null && sed -i 's/onnx-operators_onnx_torch-ml.pb.h/onnx\\/onnx-operators_onnx_torch-ml.pb.h/g' $(@D)/onnx/onnx-operators-ml.pb.h", + tools = [":gen_proto"], +) + +cc_library( + name = "onnx", + srcs = glob( + [ + "onnx/*.cc", + "onnx/common/*.cc", + "onnx/defs/*.cc", + "onnx/defs/controlflow/*.cc", + "onnx/defs/experiments/*.cc", + "onnx/defs/generator/*.cc", + "onnx/defs/logical/*.cc", + "onnx/defs/math/*.cc", + "onnx/defs/nn/*.cc", + "onnx/defs/object_detection/*.cc", + "onnx/defs/quantization/*.cc", + "onnx/defs/reduction/*.cc", + "onnx/defs/rnn/*.cc", + "onnx/defs/sequence/*.cc", + "onnx/defs/tensor/*.cc", + "onnx/defs/traditionalml/*.cc", + "onnx/defs/traditionalml/*.cc", + "onnx/optimizer/*.cc", + "onnx/shape_inference/*.cc", + "onnx/version_converter/*.cc", + ], + exclude = [ + "onnx/cpp2py_export.cc", + ], + ), + hdrs = glob([ + "onnx/*.h", + "onnx/version_converter/*.h", + "onnx/common/*.h", + "onnx/defs/*.h", + "onnx/defs/tensor/*.h", + "onnx/shape_inference/*.h", + "onnx/optimizer/*.h", + "onnx/optimizer/passes/*.h", + "onnx/version_converter/adapters/*.h", + ]) + [ + "onnx/onnx-ml.pb.h", + "onnx/onnx-operators-ml.pb.h", + ], + defines = [ + "ONNX_ML=1", + "ONNX_NAMESPACE=onnx_torch", + ], + includes = [ + ".", + "onnx/", + ], + visibility = ["//visibility:public"], + deps = [ + ":onnx_proto_lib", + ], +) + +cc_library( + name = "onnx_proto_headers", + hdrs = glob([ + "onnx/*_pb.h", + ]), + visibility = ["//visibility:public"], + deps = [ + ":onnx_proto_lib", + ], +) + +proto_library( + name = "onnx_proto", + srcs = [ + "onnx/onnx-operators_onnx_torch-ml.proto", + "onnx/onnx_onnx_torch-ml.proto", + ], +) + +cc_proto_library( + name = "onnx_proto_lib", + deps = [":onnx_proto"], +) diff --git a/third_party/sleef.BUILD b/third_party/sleef.BUILD new file mode 100644 index 00000000000..6b7349e6696 --- /dev/null +++ b/third_party/sleef.BUILD @@ -0,0 +1,494 @@ +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") +load("@//third_party:sleef.bzl", "sleef_cc_library") + +SLEEF_COPTS = [ + "-DHAVE_MALLOC_USABLE_SIZE=1", + "-DHAVE_MMAP=1", + "-DHAVE_SHM_OPEN=1", + "-DHAVE_SHM_UNLINK=1", + "-DIDEEP_USE_MKL", + "-DMKLDNN_THR=MKLDNN_THR_TBB", + "-DONNX_ML=1", + "-DONNX_NAMESPACE=onnx", + "-DTH_BLAS_MKL", + "-D_FILE_OFFSET_BITS=64", + "-ffp-contract=off", + "-fno-math-errno", + "-fno-trapping-math", + "-DCAFFE2_USE_GLOO", + "-DCUDA_HAS_FP16=1", + "-DHAVE_GCC_GET_CPUID", + "-DUSE_AVX", + "-DUSE_AVX2", + "-DTH_HAVE_THREAD", + "-std=gnu99", +] + +SLEEF_COMMON_TARGET_COPTS = [ + "-DSLEEF_STATIC_LIBS=1", + "-DENABLE_ALIAS=1", +] + +SLEEF_PRIVATE_HEADERS = glob([ + "build/include/*.h", + "src/arch/*.h", + "src/common/*.h", + "src/libm/*.h", + "src/libm/include/*.h", +]) + +SLEEF_PUBLIC_HEADERS = [ + ":sleef_h", +] + +SLEEF_PRIVATE_INCLUDES = [ + "-Iexternal/sleef/src/arch", + "-Iexternal/sleef/src/common", +] + +SLEEF_PUBLIC_INCLUDES = [ + "build/include", +] + +SLEEF_VISIBILITY = [ + "@pytorch//:__subpackages__", +] + +cc_binary( + name = "mkalias", + srcs = [ + "src/libm/funcproto.h", + "src/libm/mkalias.c", + ], +) + +genrule( + name = "alias_avx512f_h", + outs = ["alias_avx512f.h"], + cmd = "{ " + "; ".join([ + "$(location :mkalias) -16 __m512 __m512i e avx512f", + "$(location :mkalias) 8 __m512d __m256i e avx512f", + ]) + "; } > $@", + tools = [":mkalias"], +) + +cc_binary( + name = "mkdisp", + srcs = [ + "src/libm/funcproto.h", + "src/libm/mkdisp.c", + ], + copts = SLEEF_COPTS, +) + +genrule( + name = "dispavx_c", + srcs = ["src/libm/dispavx.c.org"], + outs = ["dispavx.c"], + cmd = "{ cat $(location src/libm/dispavx.c.org); $(location :mkdisp) 4 8 __m256d __m256 __m128i avx fma4 avx2; } > $@", + tools = [":mkdisp"], +) + +genrule( + name = "dispsse_c", + srcs = ["src/libm/dispsse.c.org"], + outs = ["dispsse.c"], + cmd = "{ cat $(location src/libm/dispsse.c.org); $(location :mkdisp) 2 4 __m128d __m128 __m128i sse2 sse4 avx2128; } > $@", + tools = [":mkdisp"], +) + +cc_binary( + name = "mkrename", + srcs = [ + "src/libm/funcproto.h", + "src/libm/mkrename.c", + ], +) + +genrule( + name = "renameavx_h", + outs = ["renameavx.h"], + cmd = "$(location :mkrename) cinz_ 4 8 avx > $@", + tools = [":mkrename"], +) + +genrule( + name = "renameavx2_h", + outs = ["renameavx2.h"], + cmd = "$(location :mkrename) finz_ 4 8 avx2 > $@", + tools = [":mkrename"], +) + +genrule( + name = "renameavx2128_h", + outs = ["renameavx2128.h"], + cmd = "$(location :mkrename) finz_ 2 4 avx2128 > $@", + tools = [":mkrename"], +) + +genrule( + name = "renameavx512f_h", + outs = ["renameavx512f.h"], + cmd = "$(location :mkrename) finz_ 8 16 avx512f > $@", + tools = [":mkrename"], +) + +genrule( + name = "renameavx512fnofma_h", + outs = ["renameavx512fnofma.h"], + cmd = "$(location :mkrename) cinz_ 8 16 avx512fnofma > $@", + tools = [":mkrename"], +) + +genrule( + name = "renamefma4_h", + outs = ["renamefma4.h"], + cmd = "$(location :mkrename) finz_ 4 8 fma4 > $@", + tools = [":mkrename"], +) + +genrule( + name = "renamepurec_scalar_h", + outs = ["renamepurec_scalar.h"], + cmd = "$(location :mkrename) cinz_ 1 1 purec > $@", + tools = [":mkrename"], +) + +genrule( + name = "renamepurecfma_scalar_h", + outs = ["renamepurecfma_scalar.h"], + cmd = "$(location :mkrename) finz_ 1 1 purecfma > $@", + tools = [":mkrename"], +) + +genrule( + name = "renamesse2_h", + outs = ["renamesse2.h"], + cmd = "$(location :mkrename) cinz_ 2 4 sse2 > $@", + tools = [":mkrename"], +) + +genrule( + name = "renamesse4_h", + outs = ["renamesse4.h"], + cmd = "$(location :mkrename) cinz_ 2 4 sse4 > $@", + tools = [":mkrename"], +) + +genrule( + name = "sleef_h", + srcs = [ + "src/libm/sleeflibm_header.h.org", + "src/libm/sleeflibm_footer.h.org", + ], + outs = ["build/include/sleef.h"], + cmd = "{ " + "; ".join([ + "cat $(location src/libm/sleeflibm_header.h.org)", + "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__", + "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2", + "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4", + "$(location :mkrename) cinz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__", + "$(location :mkrename) cinz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__ avx", + "$(location :mkrename) finz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__ fma4", + "$(location :mkrename) finz_ 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2", + "$(location :mkrename) finz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128", + "$(location :mkrename) finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__", + "$(location :mkrename) finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512f", + "$(location :mkrename) cinz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512fnofma", + "$(location :mkrename) cinz_ 1 1 double float int32_t int32_t __STDC__ purec", + "$(location :mkrename) finz_ 1 1 double float int32_t int32_t FP_FAST_FMA purecfma", + "cat $(location src/libm/sleeflibm_footer.h.org)", + ]) + "; } > $@", + tools = [":mkrename"], +) + +cc_library( + name = "sleef", + srcs = [ + "src/libm/rempitab.c", + "src/libm/sleefdp.c", + "src/libm/sleefld.c", + "src/libm/sleefqp.c", + "src/libm/sleefsp.c", + ], + hdrs = SLEEF_PUBLIC_HEADERS, + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLEFLOAT128=1", + "-Wno-unused-result", + ], + includes = SLEEF_PUBLIC_INCLUDES, + # -lgcc resolves + # U __addtf3 + # U __eqtf2 + # U __fixtfdi + # U __floatditf + # U __gttf2 + # U __lttf2 + # U __multf3 + # U __subtf3 + # in bazel-bin/external/sleef/_objs/sleef/sleefqp.pic.o + linkopts = [ + "-lgcc", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + # The purpose of the lists in deps is to keep related pairs of + # libraries together. In particular, each pair that contains a *det* + # library originates with a sleef_cc_library(). + deps = [ + ":common", + ":dispavx", + ":dispsse", + ] + [ + ":sleefavx", + ":sleefdetavx", + ] + [ + ":sleefavx2", + ":sleefdetavx2", + ] + [ + ":sleefavx2128", + ":sleefdetavx2128", + ] + [ + ":sleefavx512f", + ":sleefdetavx512f", + ] + [ + ":sleefavx512fnofma", + ":sleefdetavx512fnofma", + ] + [ + ":sleeffma4", + ":sleefdetfma4", + ] + [ + ":sleefsse2", + ":sleefdetsse2", + ] + [ + ":sleefsse4", + ":sleefdetsse4", + ] + [ + ":sleefpurec_scalar", + ":sleefdetpurec_scalar", + ] + [ + ":sleefpurecfma_scalar", + ":sleefdetpurecfma_scalar", + ], + alwayslink = True, +) + +cc_library( + name = "common", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/common/common.c", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + [ + "-Wno-unused-result", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +cc_library( + name = "dispavx", + srcs = SLEEF_PRIVATE_HEADERS + SLEEF_PUBLIC_HEADERS + [ + ":dispavx_c", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DENABLE_AVX2=1", + "-DENABLE_FMA4=1", + "-mavx", + ], + includes = SLEEF_PUBLIC_INCLUDES, + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +cc_library( + name = "dispsse", + srcs = SLEEF_PRIVATE_HEADERS + SLEEF_PUBLIC_HEADERS + [ + ":dispsse_c", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DENABLE_AVX2=1", + "-DENABLE_FMA4=1", + "-msse2", + ], + includes = SLEEF_PUBLIC_INCLUDES, + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefavx512f", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":alias_avx512f_h", + ":renameavx512f_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DALIAS_NO_EXT_SUFFIX=\\\"alias_avx512f.h\\\"", + "-DENABLE_AVX512F=1", + "-mavx512f", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefavx512fnofma", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renameavx512fnofma_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_AVX512FNOFMA=1", + "-mavx512f", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefavx", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renameavx_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_AVX=1", + "-mavx", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefavx2", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renameavx2_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_AVX2=1", + "-mavx2", + "-mfma", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefavx2128", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renameavx2128_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_AVX2128=1", + "-mavx2", + "-mfma", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleeffma4", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renamefma4_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_FMA4=1", + "-mfma4", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefsse2", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renamesse2_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_SSE2=1", + "-msse2", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefsse4", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renamesse4_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_SSE4=1", + "-msse4.1", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefpurec_scalar", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renamepurec_scalar_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_PUREC_SCALAR=1", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) + +sleef_cc_library( + name = "sleefpurecfma_scalar", + srcs = SLEEF_PRIVATE_HEADERS + [ + "src/libm/sleefsimddp.c", + "src/libm/sleefsimdsp.c", + ":renamepurecfma_scalar_h", + ], + copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [ + "-DDORENAME=1", + "-DENABLE_PURECFMA_SCALAR=1", + "-mavx2", + "-mfma", + ], + linkstatic = True, + visibility = SLEEF_VISIBILITY, + alwayslink = True, +) diff --git a/third_party/sleef.bzl b/third_party/sleef.bzl new file mode 100644 index 00000000000..6fb022397f8 --- /dev/null +++ b/third_party/sleef.bzl @@ -0,0 +1,22 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +# This macro provides for generating both "sleef" and +# "sleefdet" libraries for a given set of code. The difference is +# that the "det" libraries get compiled with "-DDETERMINISTIC=1". + +def sleef_cc_library(name, copts, **kwargs): + cc_library( + name = name, + copts = copts, + **kwargs + ) + + prefix = "sleef" + if not name.startswith(prefix): + fail("name {} does not start with {}".format(repr(name), repr(prefix))) + + cc_library( + name = name.replace(prefix, prefix + "det", 1), + copts = copts + ["-DDETERMINISTIC=1"], + **kwargs + ) diff --git a/third_party/substitution.bzl b/third_party/substitution.bzl new file mode 100644 index 00000000000..bcc24cae708 --- /dev/null +++ b/third_party/substitution.bzl @@ -0,0 +1,44 @@ +# This Bazel rules file is derived from https://github.com/tensorflow/tensorflow/blob/master/third_party/common.bzl + +# Rule for simple expansion of template files. This performs a simple +# search over the template file for the keys in substitutions, +# and replaces them with the corresponding values. +# +# Typical usage: +# load("/tools/build_rules/template_rule", "template_rule") +# template_rule( +# name = "ExpandMyTemplate", +# src = "my.template", +# out = "my.txt", +# substitutions = { +# "$VAR1": "foo", +# "$VAR2": "bar", +# } +# ) +# +# Args: +# name: The name of the rule. +# template: The template file to expand +# out: The destination of the expanded file +# substitutions: A dictionary mapping strings to their substitutions + +def template_rule_impl(ctx): + ctx.actions.expand_template( + template = ctx.file.src, + output = ctx.outputs.out, + substitutions = ctx.attr.substitutions, + ) + +template_rule = rule( + attrs = { + "src": attr.label( + mandatory = True, + allow_single_file = True, + ), + "out": attr.output(mandatory = True), + "substitutions": attr.string_dict(mandatory = True), + }, + # output_to_genfiles is required for header files. + output_to_genfiles = True, + implementation = template_rule_impl, +) diff --git a/third_party/tbb.BUILD b/third_party/tbb.BUILD new file mode 100644 index 00000000000..b7e18c92e5e --- /dev/null +++ b/third_party/tbb.BUILD @@ -0,0 +1,75 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") +load("@//third_party:substitution.bzl", "template_rule") + +licenses(["notice"]) # Apache 2.0 + +template_rule( + name = "version_string", + src = "@//:aten/src/ATen/cpu/tbb/extra/version_string.ver.in", + out = "version_string.h", + substitutions = { + "@CMAKE_SYSTEM_NAME@": "Unknown", + "@CMAKE_SYSTEM@": "Unknown", + "@CMAKE_SYSTEM_VERSION@": "Unknown", + "@CMAKE_CXX_COMPILER_ID@": "Unknown", + "@_configure_date@": "Unknown", + } +) + +cc_library( + name = "tbb", + srcs = [":version_string"] + glob( + [ + "src/old/*.h", + "src/rml/client/*.h", + "src/rml/include/*.h", + "src/rml/server/*.h", + "src/tbb/*.h", + "src/tbb/tools_api/*.h", + "src/tbb/tools_api/legacy/*.h", + "src/old/*.cpp", + "src/tbb/*.cpp", + ], + exclude = ["src/old/test_*.cpp"], + ) + ["src/rml/client/rml_tbb.cpp"], + hdrs = glob( + [ + "include/tbb/*", + "include/tbb/compat/*", + "include/tbb/internal/*", + "include/tbb/machine/*", + ], + exclude = ["include/tbb/scalable_allocator.h"], + ), + copts = [ + "-Iexternal/tbb/src/rml/include", + "-Iexternal/tbb/src", + "-pthread", + "-DDO_ITT_NOTIFY=1", + "-DUSE_PTHREAD=1", + "-D__TBB_BUILD=1", + "-D__TBB_DYNAMIC_LOAD_ENABLED=0", + "-D__TBB_SOURCE_DIRECTLY_INCLUDED=1", + "-fno-sanitize=vptr", + "-fno-sanitize=thread", + ], + defines = [ + # TBB Cannot detect the standard library version when using clang with libstdc++. + # See https://github.com/01org/tbb/issues/22 + "TBB_USE_GLIBCXX_VERSION=(_GLIBCXX_RELEASE*10000)", + "TBB_PREVIEW_GLOBAL_CONTROL=1", + "TBB_PREVIEW_LOCAL_OBSERVER=1", + "__TBB_ALLOW_MUTABLE_FUNCTORS=1", + ], + includes = [ + "include", + "src/tbb/tools_api", + ], + linkopts = [ + "-ldl", + "-lpthread", + "-lrt", + ], + textual_hdrs = ["src/tbb/tools_api/ittnotify_static.c"], + visibility = ["//visibility:public"], +) diff --git a/third_party/tbb.patch b/third_party/tbb.patch new file mode 100644 index 00000000000..4a1f6845b77 --- /dev/null +++ b/third_party/tbb.patch @@ -0,0 +1,34 @@ +diff --git a/src/rml/server/rml_server.cpp b/src/rml/server/rml_server.cpp +index 2508465..1e22ad2 100644 +--- a/src/rml/server/rml_server.cpp ++++ b/src/rml/server/rml_server.cpp +@@ -3279,10 +3279,10 @@ extern "C" void __KMP_call_with_my_server_info( ::rml::server_info_callback_t cb + /* + * RML server info + */ +-#include "version_string.ver" ++#include "version_string.h" + + #ifndef __TBB_VERSION_STRINGS +-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!") ++#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!") + #endif + + // We use the build time as the RML server info. TBB is required to build RML, so we make it the same as the TBB build time. +diff --git a/src/tbb/tbb_version.h b/src/tbb/tbb_version.h +index dcaa55b..4981a8a 100644 +--- a/src/tbb/tbb_version.h ++++ b/src/tbb/tbb_version.h +@@ -25,10 +25,10 @@ + #ifndef ENDL + #define ENDL "\n" + #endif +-#include "version_string.ver" ++#include "version_string.h" + + #ifndef __TBB_VERSION_STRINGS +-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!") ++#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!") + // here is an example of macros value: + #define __TBB_VERSION_STRINGS \ + "TBB: BUILD_HOST\tUnknown\n" \ diff --git a/tools/config/BUILD b/tools/config/BUILD new file mode 100644 index 00000000000..a8f9d0452fc --- /dev/null +++ b/tools/config/BUILD @@ -0,0 +1,42 @@ +load("@bazel_skylib//lib:selects.bzl", "selects") + +config_setting( + name = "cuda", + define_values = { + "cuda": "true", + }, +) + +# Even when building with --config=cuda, host targets should be built with cuda disabled +# as these targets will run on CI machines that have no GPUs. +selects.config_setting_group( + name = "cuda_enabled_and_capable", + match_all = [ + ":cuda", + "//tools/toolchain:is_cuda_capable", + ], +) + +# Configures the system to build with cuda using clang. +config_setting( + name = "cuda_clang", + define_values = { + "cuda_clang": "true", + }, +) + +# Indicates that cuda code should be compiled with nvcc +# Mostly exists to support _analysis_ of tensorflow; more work is needed to actually make this +# setting work. +config_setting( + name = "cuda_nvcc", + define_values = { + "cuda_nvcc": "true", + }, +) + +config_setting( + name = "thread_sanitizer", + define_values = {"thread_sanitizer": "1"}, + visibility = ["//visibility:public"], +) diff --git a/tools/config/defs.bzl b/tools/config/defs.bzl new file mode 100644 index 00000000000..6ddd0e99156 --- /dev/null +++ b/tools/config/defs.bzl @@ -0,0 +1,65 @@ +""" + Macros for selecting with / without various GPU libraries. Most of these are meant to be used + directly by tensorflow in place of their build's own configure.py + bazel-gen system. +""" + +load("@bazel_skylib//lib:selects.bzl", "selects") + +def if_cuda(if_true, if_false = []): + """Helper for selecting based on the whether CUDA is configured. """ + return selects.with_or({ + "@//tools/config:cuda_enabled_and_capable": if_true, + "//conditions:default": if_false, + }) + +def if_tensorrt(if_true, if_false = []): + """Helper for selecting based on the whether TensorRT is configured. """ + return select({ + "//conditions:default": if_false, + }) + +def if_rocm(if_true, if_false = []): + """Helper for selecting based on the whether ROCM is configured. """ + return select({ + "//conditions:default": if_false, + }) + +def if_sycl(if_true, if_false = []): + """Helper for selecting based on the whether SYCL/ComputeCPP is configured.""" + + # NOTE: Tensorflow expects some stange behavior (see their if_sycl) if we + # actually plan on supporting this at some point. + return select({ + "//conditions:default": if_false, + }) + +def if_ccpp(if_true, if_false = []): + """Helper for selecting based on the whether ComputeCPP is configured. """ + return select({ + "//conditions:default": if_false, + }) + +def cuda_default_copts(): + return if_cuda(["-DGOOGLE_CUDA=1"]) + +def cuda_default_features(): + return if_cuda(["-per_object_debug_info", "-use_header_modules", "cuda_clang"]) + +def rocm_default_copts(): + return if_rocm(["-x", "rocm"]) + +def rocm_copts(opts = []): + return rocm_default_copts() + if_rocm(opts) + +def cuda_is_configured(): + # FIXME(dcollins): currently only used by tensorflow's xla stuff, which we aren't building. However bazel + # query hits it so this needs to be defined. Because bazel doesn't actually resolve config at macro expansion + # time, `select` can't be used here (since xla expects lists of strings and not lists of select objects). + # Instead, the xla build rules must be rewritten to use `if_cuda_is_configured` + return False + +def if_cuda_is_configured(x): + return if_cuda(x, []) + +def if_rocm_is_configured(x): + return if_rocm(x, []) diff --git a/tools/rules/BUILD b/tools/rules/BUILD new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/rules/cu.bzl b/tools/rules/cu.bzl new file mode 100644 index 00000000000..fa4b80db01c --- /dev/null +++ b/tools/rules/cu.bzl @@ -0,0 +1,3 @@ +# gpu support is not available +def cu_library(**kwargs): + pass diff --git a/tools/rules/workspace.bzl b/tools/rules/workspace.bzl new file mode 100644 index 00000000000..b519ae925b4 --- /dev/null +++ b/tools/rules/workspace.bzl @@ -0,0 +1,29 @@ +def _impl(repository_ctx): + archive = repository_ctx.attr.name + ".tar" + reference = Label("@%s_unpatched//:README" % repository_ctx.attr.name) + dirname = repository_ctx.path(reference).dirname + repository_ctx.execute(["tar", "hcf", archive, "-C", dirname, "."]) + repository_ctx.extract(archive) + for patch in repository_ctx.attr.patches: + repository_ctx.patch(repository_ctx.path(patch), repository_ctx.attr.patch_strip) + build_file = repository_ctx.path(repository_ctx.attr.build_file) + repository_ctx.execute(["cp", build_file, "BUILD.bazel"]) + +_patched_rule = repository_rule( + implementation = _impl, + attrs = { + "patches": attr.label_list(), + "patch_strip": attr.int(), + "build_file": attr.label(), + }, +) + +def new_patched_local_repository(name, path, **kwargs): + native.new_local_repository( + name = name + "_unpatched", + build_file_content = """ +pkg_tar(name = "content", srcs = glob(["**"])) +""", + path = path, + ) + _patched_rule(name = name, **kwargs)