From 67570a60ba4e3ba3aa1dcae4b44804a7e710cfa6 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 16 Sep 2021 12:37:10 -0700 Subject: [PATCH] Disable ParallelTBB (#65092) Summary: As ParallelTBB's `at::get_thread_num` is not compatible with general model used by OpenMP and ParallelNative (where it is an contiguous thread index within parallel loop), see https://github.com/pytorch/pytorch/issues/64571#issuecomment-914691883 More examples of similar regressions: https://github.com/pytorch/pytorch/runs/3612142217 Pull Request resolved: https://github.com/pytorch/pytorch/pull/65092 Reviewed By: zhouzhuojie Differential Revision: D30995936 Pulled By: malfet fbshipit-source-id: db145b6a850d794f2c954f59f30249b291473e36 --- .github/generated-ciflow-ruleset.json | 3 - .github/scripts/generate_ci_workflows.py | 27 +- ...-paralleltbb-linux-xenial-py3.6-gcc5.4.yml | 551 ------------------ CMakeLists.txt | 2 +- caffe2/CMakeLists.txt | 1 + 5 files changed, 16 insertions(+), 568 deletions(-) delete mode 100644 .github/workflows/generated-paralleltbb-linux-xenial-py3.6-gcc5.4.yml diff --git a/.github/generated-ciflow-ruleset.json b/.github/generated-ciflow-ruleset.json index f739a316d35..b96077abdde 100644 --- a/.github/generated-ciflow-ruleset.json +++ b/.github/generated-ciflow-ruleset.json @@ -12,7 +12,6 @@ "linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc7-bazel-test", "parallelnative-linux-xenial-py3.6-gcc5.4", - "paralleltbb-linux-xenial-py3.6-gcc5.4", "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7", "periodic-linux-xenial-cuda11.1-py3.6-gcc7", "periodic-win-vs2019-cuda11.1-py3", @@ -33,7 +32,6 @@ "linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc7-bazel-test", "parallelnative-linux-xenial-py3.6-gcc5.4", - "paralleltbb-linux-xenial-py3.6-gcc5.4", "puretorch-linux-xenial-py3.6-gcc5.4", "win-vs2019-cpu-py3" ], @@ -74,7 +72,6 @@ "linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc7-bazel-test", "parallelnative-linux-xenial-py3.6-gcc5.4", - "paralleltbb-linux-xenial-py3.6-gcc5.4", "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7", "periodic-linux-xenial-cuda11.1-py3.6-gcc7", "puretorch-linux-xenial-py3.6-gcc5.4" diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py index 021e17cb3a0..2be99767452 100755 --- a/.github/scripts/generate_ci_workflows.py +++ b/.github/scripts/generate_ci_workflows.py @@ -295,19 +295,20 @@ LINUX_WORKFLOWS = [ labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU} ), ), - CIWorkflow( - arch="linux", - build_environment="paralleltbb-linux-xenial-py3.6-gcc5.4", - docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4", - test_runner_type=LINUX_CPU_TEST_RUNNER, - # This is a master only job despite on_pull_request is set to True - on_pull_request=True, - ciflow_config=CIFlowConfig( - enabled=True, - trigger_action_only=True, - labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU}, - ), - ), + # ParallelTBB does not have a maintainer and is currently flaky + # CIWorkflow( + # arch="linux", + # build_environment="paralleltbb-linux-xenial-py3.6-gcc5.4", + # docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4", + # test_runner_type=LINUX_CPU_TEST_RUNNER, + # # This is a master only job despite on_pull_request is set to True + # on_pull_request=True, + # ciflow_config=CIFlowConfig( + # enabled=True, + # trigger_action_only=True, + # labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU}, + # ), + # ), CIWorkflow( arch="linux", build_environment="parallelnative-linux-xenial-py3.6-gcc5.4", diff --git a/.github/workflows/generated-paralleltbb-linux-xenial-py3.6-gcc5.4.yml b/.github/workflows/generated-paralleltbb-linux-xenial-py3.6-gcc5.4.yml deleted file mode 100644 index e71e998acb5..00000000000 --- a/.github/workflows/generated-paralleltbb-linux-xenial-py3.6-gcc5.4.yml +++ /dev/null @@ -1,551 +0,0 @@ -# @generated DO NOT EDIT MANUALLY -# Template is at: .github/templates/linux_ci_workflow.yml.j2 -# Generation script: .github/scripts/generate_ci_workflows.py -name: paralleltbb-linux-xenial-py3.6-gcc5.4 - -on: - pull_request: - types: [unassigned] - push: - branches: - - master - - release/* - workflow_dispatch: - -env: - BUILD_ENVIRONMENT: paralleltbb-linux-xenial-py3.6-gcc5.4 - DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4 - SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 - XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla - TORCH_CUDA_ARCH_LIST: 5.2 - IN_CI: 1 - # This is used for the phase of adding wheel tests only, will be removed once completed - IN_WHEEL_TEST: 1 - # Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh - CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} - -concurrency: - group: paralleltbb-linux-xenial-py3.6-gcc5.4-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true - -jobs: - ciflow_should_run: - runs-on: ubuntu-18.04 - if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }} - env: - LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} - steps: - - name: noop - run: echo running ciflow_should_run - - name: print labels - run: echo "${LABELS}" - calculate-docker-image: - runs-on: linux.2xlarge - needs: [ciflow_should_run] - env: - DOCKER_BUILDKIT: 1 - timeout-minutes: 90 - outputs: - docker_image: ${{ steps.calculate-tag.outputs.docker_image }} - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh - bash /tmp/ecr-login.sh - rm /tmp/ecr-login.sh - - name: Chown workspace - env: - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - # Ensure the working directory gets chowned back to the current user - retry docker pull "${ALPINE_IMAGE}" - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE:?}/*" - rm -f ~/.ssh/authorized_keys - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: false - - name: Calculate docker image tag - id: calculate-tag - run: | - DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker) - echo "::set-output name=docker_tag::${DOCKER_TAG}" - echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" - - name: Check if image should be built - id: check - env: - DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }} - BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }} - run: | - set -x - # Check if image already exists, if it does then skip building it - if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then - exit 0 - fi - if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then - # if we're on the base branch then use the parent commit - MERGE_BASE=$(git rev-parse HEAD~) - else - # otherwise we're on a PR, so use the most recent base commit - MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION") - fi - # Covers the case where a previous tag doesn't exist for the tree - # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly - if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then - echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit" - exit 1 - fi - PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker") - # If no image exists but the hash is the same as the previous hash then we should error out here - if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then - echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch" - echo " contact the PyTorch team to restore the original images" - exit 1 - fi - echo ::set-output name=rebuild::yes - - name: Build and push docker image - if: ${{ steps.check.outputs.rebuild }} - env: - DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }} - DOCKER_SKIP_S3_UPLOAD: 1 - run: | - export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/} - cd .circleci/docker && ./build_docker.sh - - build: - runs-on: linux.2xlarge - needs: [calculate-docker-image, ciflow_should_run] - env: - DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} - JOB_BASE_NAME: paralleltbb-linux-xenial-py3.6-gcc5.4-build - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh - bash /tmp/ecr-login.sh - rm /tmp/ecr-login.sh - - name: Chown workspace - env: - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - # Ensure the working directory gets chowned back to the current user - retry docker pull "${ALPINE_IMAGE}" - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE:?}/*" - rm -f ~/.ssh/authorized_keys - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: recursive - - name: Pull docker image - run: | - docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch - run: | - # detached container should get cleaned up by teardown_ec2_linux - container_name=$(docker run \ - -e BUILD_ENVIRONMENT \ - -e JOB_BASE_NAME \ - -e MAX_JOBS="$(nproc --ignore=2)" \ - -e SCCACHE_BUCKET \ - -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ - -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \ - -e SKIP_SCCACHE_INITIALIZATION=1 \ - -e TORCH_CUDA_ARCH_LIST \ - -e PR_LABELS \ - -e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \ - --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ - --security-opt seccomp=unconfined \ - --cap-add=SYS_PTRACE \ - --tty \ - --detach \ - --user jenkins \ - -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ - -w /var/lib/jenkins/workspace \ - "${DOCKER_IMAGE}" - ) - docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh' - - name: Parse ref - id: parse-ref - run: .github/scripts/parse_ref.py - - name: Display and upload binary build size statistics (Click Me) - # temporary hack: set CIRCLE_* vars, until we update - # tools/stats/print_test_stats.py to natively support GitHub Actions - env: - AWS_DEFAULT_REGION: us-east-1 - IS_GHA: 1 - SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} - CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }} - CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }} - CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }} - CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}' - run: | - COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0) - export COMMIT_TIME - pip3 install requests==2.26 boto3==1.16.34 - python3 -m tools.stats.upload_binary_size_to_scuba || exit 0 - - name: Chown workspace - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Archive artifacts into zip - run: | - zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json - - uses: seemethere/upload-artifact-s3@v3 - name: Store PyTorch Build Artifacts on S3 - with: - name: ${{ env.BUILD_ENVIRONMENT }} - retention-days: 14 - if-no-files-found: error - path: - artifacts.zip - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - env: - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Clean up docker images - if: always() - run: | - # Prune all of the docker images - docker system prune -af - - generate-test-matrix: - runs-on: ubuntu-18.04 - needs: [ciflow_should_run] - env: - TEST_RUNNER_TYPE: linux.2xlarge - ENABLE_DISTRIBUTED_TEST: 1 - ENABLE_JIT_LEGACY_TEST: '' - ENABLE_MULTIGPU_TEST: '' - ENABLE_NOGPU_NO_AVX_TEST: '' - ENABLE_NOGPU_NO_AVX2_TEST: '' - ENABLE_SLOW_TEST: '' - ENABLE_DOCS_TEST: '' - ENABLE_BACKWARDS_COMPAT_TEST: '' - ENABLE_XLA_TEST: '' - ENABLE_NOARCH_TEST: '' - NUM_TEST_SHARDS: 1 - MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu - NOGPU_RUNNER_TYPE: linux.2xlarge - PR_BODY: ${{ github.event.pull_request.body }} - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - render-matrix: ${{ steps.set-matrix.outputs.render-matrix }} - ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }} - container: - image: python:3.9 - steps: - - name: Install dependencies - run: pip install typing-extensions==3.10 - - name: Clone pytorch/pytorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - - name: Generating test matrix - id: set-matrix - run: .github/scripts/generate_pytorch_test_matrix.py - - test: - needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run] - strategy: - matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }} - fail-fast: false - runs-on: ${{ matrix.runner }} - env: - DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }} - JOB_BASE_NAME: paralleltbb-linux-xenial-py3.6-gcc5.4-test - TEST_CONFIG: ${{ matrix.config }} - SHARD_NUMBER: ${{ matrix.shard }} - NUM_TEST_SHARDS: ${{ matrix.num_shards }} - PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }} - CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }} - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh - bash /tmp/ecr-login.sh - rm /tmp/ecr-login.sh - - name: Chown workspace - env: - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - # Ensure the working directory gets chowned back to the current user - retry docker pull "${ALPINE_IMAGE}" - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE:?}/*" - rm -f ~/.ssh/authorized_keys - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: recursive - - name: Pull docker image - run: | - docker pull "${DOCKER_IMAGE}" - - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }} - run: | - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - - name: Determine shm-size - run: | - shm_size="1g" - case "${BUILD_ENVIRONMENT}" in - *cuda*) - shm_size="2g" - ;; - *rocm*) - shm_size="8g" - ;; - esac - echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}" - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download PyTorch Build Artifacts - with: - name: ${{ env.BUILD_ENVIRONMENT }} - - name: Unzip artifacts - run: | - unzip -o artifacts.zip - - name: Output disk space left - run: | - sudo df -H - - name: Test PyTorch - env: - PR_NUMBER: ${{ github.event.pull_request.number }} - run: | - if [[ $TEST_CONFIG == 'multigpu' ]]; then - TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh - else - TEST_COMMAND=.jenkins/pytorch/test.sh - fi - if [[ $NUM_TEST_SHARDS -ne 2 ]]; then - export SHARD_NUMBER=0 - fi - # detached container should get cleaned up by teardown_ec2_linux - # TODO: Stop building test binaries as part of the build phase - # Used for GPU_FLAG since that doesn't play nice - # shellcheck disable=SC2086 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BUILD_ENVIRONMENT \ - -e PR_NUMBER \ - -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \ - -e GITHUB_ACTIONS \ - -e IN_CI \ - -e IN_WHEEL_TEST \ - -e SHARD_NUMBER \ - -e JOB_BASE_NAME \ - -e TEST_CONFIG \ - -e NUM_TEST_SHARDS \ - -e PYTORCH_IGNORE_DISABLED_ISSUES \ - -e PR_LABELS \ - -e CONTINUE_THROUGH_ERROR \ - -e MAX_JOBS="$(nproc --ignore=2)" \ - -e SCCACHE_BUCKET \ - -e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \ - -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ - --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ - --security-opt seccomp=unconfined \ - --cap-add=SYS_PTRACE \ - --shm-size="${SHM_SIZE}" \ - --tty \ - --detach \ - --name="${container_name}" \ - --user jenkins \ - -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ - -w /var/lib/jenkins/workspace \ - "${DOCKER_IMAGE}" - ) - docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}" - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Install render_test_results dependencies - if: always() - shell: bash - run: | - python3 -m pip install junitparser==2.1.1 rich==10.9.0 - - name: "[[ Click me for rendered test results (useful for finding failing tests) ]]" - if: always() - shell: bash - # Encoding is weird on windows, just try to default to utf-8 if possible - env: - PYTHONIOENCODING: "utf-8" - run: | - python3 tools/render_junit.py test/ - - name: Zip test reports for upload - if: always() - env: - FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}' - run: | - # Remove any previous test reports if they exist - rm -f test-reports-*.zip - zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml' - - uses: actions/upload-artifact@v2 - name: Store Test Reports - if: always() - with: - name: test-reports-${{ matrix.config }} - retention-days: 14 - if-no-files-found: error - path: - test-reports-*.zip - - uses: seemethere/upload-artifact-s3@v3 - name: Store Test Reports on S3 - if: always() - with: - retention-days: 14 - if-no-files-found: error - path: - test-reports-*.zip - - name: Parse ref - id: parse-ref - run: .github/scripts/parse_ref.py - - name: Display and upload test statistics (Click Me) - if: always() - # temporary hack: set CIRCLE_* vars, until we update - # tools/stats/print_test_stats.py to natively support GitHub Actions - env: - AWS_DEFAULT_REGION: us-east-1 - CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }} - JOB_BASE_NAME: paralleltbb-linux-xenial-py3.6-gcc5.4-test - CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }} - CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }} - CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}' - shell: bash - run: | - python3 -m pip install -r requirements.txt - python3 -m pip install boto3==1.16.34 - python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - env: - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af diff --git a/CMakeLists.txt b/CMakeLists.txt index 0956b6ad2d8..d93af48e62a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -309,7 +309,7 @@ cmake_dependent_option( cmake_dependent_option( USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON "USE_DISTRIBUTED" OFF) -option(USE_TBB "Use TBB" OFF) +option(USE_TBB "Use TBB (Deprecated)" OFF) cmake_dependent_option( USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 3c2fb83d443..6380e7a959d 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -47,6 +47,7 @@ elseif("${ATEN_THREADING}" STREQUAL "TBB") if(NOT USE_TBB) message(FATAL_ERROR "Using TBB backend but USE_TBB is off") endif() + message(WARNING "ATEN TBB Threading is deprectated.") set(AT_PARALLEL_NATIVE_TBB 1) else() message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")