Adding windows CUDA smoke tests on PRs (#59686)

Summary: Adding windows CUDA smoke tests on PRs (master should run the full suite). Next step: - Automate data update so we get a new smoke test list without manual effort Pull Request resolved: https://github.com/pytorch/pytorch/pull/59686 Test Plan: https://github.com/pytorch/pytorch/actions/runs/958296267 The sharded smoke tests take long still because of dependencies installation Reviewed By: walterddr Differential Revision: D29243533 Pulled By: janeyx99 fbshipit-source-id: dde7ba127fa15c95bda0e833cc5311598fb85e2b
2025-12-06 12:20:52 +01:00 · 2021-06-23 10:12:13 -07:00 · 2021-06-23 10:12:13 -07:00 · c63a0d0cfe
commit c63a0d0cfe
parent 8162439cbd
11 changed files with 70 additions and 16 deletions
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@ -88,7 +88,6 @@ WINDOWS_WORKFLOWS = [
        cuda_version="10.1",
        test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
        on_pull_request=True,
        only_build_on_pull_request=True,
        num_test_shards=2,
    ),
    PyTorchWindowsWorkflow(
--- a/.github/templates/windows_ci_workflow.yml.j2
+++ b/.github/templates/windows_ci_workflow.yml.j2
@ -170,6 +170,9 @@ jobs:
            else
              export SHARD_NUMBER=0
            fi
            if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
              export RUN_SMOKE_TESTS_ONLY=1
            fi
            .jenkins/pytorch/win-test.sh
      - uses: actions/upload-artifact@v2
        name: Store PyTorch Test Reports
--- a/.github/workflows/pytorch-win-vs2019-cpu-py3.yml
+++ b/.github/workflows/pytorch-win-vs2019-cpu-py3.yml
@ -138,6 +138,9 @@ jobs:
            else
              export SHARD_NUMBER=0
            fi
            if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
              export RUN_SMOKE_TESTS_ONLY=1
            fi
            .jenkins/pytorch/win-test.sh
      - uses: actions/upload-artifact@v2
        name: Store PyTorch Test Reports
--- a/.github/workflows/pytorch-win-vs2019-cuda10-cudnn7-py3.yml
+++ b/.github/workflows/pytorch-win-vs2019-cuda10-cudnn7-py3.yml
@ -80,7 +80,7 @@ jobs:
          path: C:\w\build-results
  generate-test-matrix:
-    if: ${{ github.repository_owner == 'pytorch' && github.event_name == 'push' }}
+    if: ${{ github.repository_owner == 'pytorch' }}
    runs-on: ubuntu-18.04
    env:
      NUM_TEST_SHARDS: 2
@ -100,7 +100,6 @@ jobs:
          echo "::set-output name=matrix::${MATRIX}"
  test:
    if: ${{ github.event_name == 'push' }}
    runs-on: windows.8xlarge.nvidia.gpu
    env:
      JOB_BASE_NAME: pytorch-win-vs2019-cuda10-cudnn7-py3-test
@ -157,6 +156,9 @@ jobs:
            else
              export SHARD_NUMBER=0
            fi
            if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
              export RUN_SMOKE_TESTS_ONLY=1
            fi
            .jenkins/pytorch/win-test.sh
      - uses: actions/upload-artifact@v2
        name: Store PyTorch Test Reports
@ -174,7 +176,7 @@ jobs:
  # logs (like test); we can always move it back to the other one, but it
  # doesn't create the best experience
  render_test_results:
-    if:  ${{ github.event_name == 'push' && always() }}
+    if: always()
    needs:
      - test
    runs-on: ubuntu-18.04
--- a/.github/workflows/pytorch-win-vs2019-cuda11-cudnn8-py3.yml
+++ b/.github/workflows/pytorch-win-vs2019-cuda11-cudnn8-py3.yml
@ -155,6 +155,9 @@ jobs:
            else
              export SHARD_NUMBER=0
            fi
            if [[ -n $GITHUB_HEAD_REF && "$USE_CUDA" == 1 ]]; then
              export RUN_SMOKE_TESTS_ONLY=1
            fi
            .jenkins/pytorch/win-test.sh
      - uses: actions/upload-artifact@v2
        name: Store PyTorch Test Reports
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ coverage.xml
 .hypothesis
 .mypy_cache
 /.extracted_scripts/
 **/.pytorch_specified_test_cases.csv
 **/.pytorch-test-times.json
 **/.pytorch-slow-tests.json
 */*.pyc
--- a/.jenkins/pytorch/win-test-helpers/test_python.bat
+++ b/.jenkins/pytorch/win-test-helpers/test_python.bat
@ -1,3 +1,13 @@
 call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
-cd test && python run_test.py --exclude-jit-executor --verbose --determine-from="%1" && cd ..
+pushd test
 if "%RUN_SMOKE_TESTS_ONLY%"=="1" (
    :: Download specified test cases to run
    curl --retry 3 -k https://raw.githubusercontent.com/pytorch/test-infra/master/stats/windows_smoke_tests.csv --output .pytorch_specified_test_cases.csv
    if ERRORLEVEL 1 exit /b 1
    python run_test.py --exclude-jit-executor --verbose --determine-from="%1" --run-specified-test-cases
 ) else (
    python run_test.py --exclude-jit-executor --verbose --determine-from="%1"
 )
 popd
 if ERRORLEVEL 1 exit /b 1
--- a/.jenkins/pytorch/win-test-helpers/test_python_first_shard.bat
+++ b/.jenkins/pytorch/win-test-helpers/test_python_first_shard.bat
@ -4,7 +4,18 @@ echo Copying over test times file
 copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%TEST_DIR_WIN%"
 pushd test
-python run_test.py --exclude-jit-executor --shard 1 2 --verbose --determine-from="%1"
+
 echo Run nn tests
 if "%RUN_SMOKE_TESTS_ONLY%"=="1" (
    :: Download specified test cases to run
    curl --retry 3 -k https://raw.githubusercontent.com/janeyx99/test-infra/add-windows-smoke-tests/stats/windows_smoke_tests.csv --output .pytorch_specified_test_cases.csv
    if ERRORLEVEL 1 exit /b 1
    python run_test.py --exclude-jit-executor --shard 1 2 --verbose --determine-from="%1" --run-specified-test-cases
 ) else (
    python run_test.py --exclude-jit-executor --shard 1 2 --verbose --determine-from="%1"
 )
 if ERRORLEVEL 1 exit /b 1
 popd
--- a/.jenkins/pytorch/win-test-helpers/test_python_second_shard.bat
+++ b/.jenkins/pytorch/win-test-helpers/test_python_second_shard.bat
@ -3,6 +3,18 @@ call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
 echo Copying over test times file
 copy /Y "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.pytorch-test-times.json" "%TEST_DIR_WIN%"
-cd test && python run_test.py --exclude-jit-executor --shard 2 2 --verbose --determine-from="%1" && cd ..
+pushd test
 if "%RUN_SMOKE_TESTS_ONLY%"=="1" (
    :: Download specified test cases to run
    curl --retry 3 -k https://raw.githubusercontent.com/pytorch/test-infra/master/stats/windows_smoke_tests.csv --output .pytorch_specified_test_cases.csv
    if ERRORLEVEL 1 exit /b 1
    python run_test.py --exclude-jit-executor --shard 2 2 --verbose --determine-from="%1" --run-specified-test-cases
 ) else (
    python run_test.py --exclude-jit-executor --shard 2 2 --verbose --determine-from="%1"
 )
 popd
 if ERRORLEVEL 1 exit /b 1
--- a/.jenkins/pytorch/win-test.sh
+++ b/.jenkins/pytorch/win-test.sh
@ -66,9 +66,12 @@ run_tests() {
    if [[ ( -z "${JOB_BASE_NAME}" || "${JOB_BASE_NAME}" == *-test ) && $NUM_TEST_SHARDS -eq 1 ]]; then
        "$SCRIPT_HELPERS_DIR"/test_python.bat "$DETERMINE_FROM"
        if [[ -z ${RUN_SMOKE_TESTS_ONLY} ]]; then
          "$SCRIPT_HELPERS_DIR"/test_custom_script_ops.bat
          "$SCRIPT_HELPERS_DIR"/test_custom_backend.bat
          "$SCRIPT_HELPERS_DIR"/test_libtorch.bat
        fi
    else
        if [[ "${BUILD_ENVIRONMENT}" == "pytorch-win-vs2019-cpu-py3" ]]; then
          export PYTORCH_COLLECT_COVERAGE=1
@ -76,16 +79,23 @@ run_tests() {
        fi
        if [[ "${JOB_BASE_NAME}" == *-test1 || "${SHARD_NUMBER}" == 1 ]]; then
            "$SCRIPT_HELPERS_DIR"/test_python_first_shard.bat "$DETERMINE_FROM"
            if [[ -z ${RUN_SMOKE_TESTS_ONLY} ]]; then
              "$SCRIPT_HELPERS_DIR"/test_libtorch.bat
              if [[ "${USE_CUDA}" == "1" ]]; then
                "$SCRIPT_HELPERS_DIR"/test_python_jit_legacy.bat "$DETERMINE_FROM"
              fi
            fi
        elif [[ "${JOB_BASE_NAME}" == *-test2 || "${SHARD_NUMBER}" == 2 ]]; then
            "$SCRIPT_HELPERS_DIR"/test_python_second_shard.bat "$DETERMINE_FROM"
            if [[ -z ${RUN_SMOKE_TESTS_ONLY} ]]; then
              "$SCRIPT_HELPERS_DIR"/test_custom_backend.bat
              "$SCRIPT_HELPERS_DIR"/test_custom_script_ops.bat
            fi
        fi
    fi
 }
 run_tests
--- a/test/run_test.py
+++ b/test/run_test.py
@ -964,7 +964,7 @@ def get_selected_tests(options):
        assert len(options.shard) == 2, "Unexpected shard format"
        assert min(options.shard) > 0, "Shards must be positive numbers"
        which_shard, num_shards = options.shard
-        assert which_shard <= num_shards, "Selected shard must be less or equal that total number of shards"
+        assert which_shard <= num_shards, "Selected shard must be less than or equal to total number of shards"
        assert num_shards <= len(selected_tests), f"Number of shards must be less than {len(selected_tests)}"
        selected_tests = get_shard(which_shard, num_shards, selected_tests)