mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[ci] delete generate-test-matrix
Today, we have two pieces that conspire to determine what workflows we run: - `generate_ci_workflows.py`, which takes a declarative description of what we want the workflow to do and uses jinja to generate a workflow yaml file - `generate-test-matrix`, which runs at CI time to dynamically generate test jobs. This is bad: - Having one layer of code generation is unfortunate, having two is confusing. - You cannot tell from a workflow yaml file what test jobs will be run. - We have to do this careful dance of plumbing the args to `generate-test-matrix` through setting env vars and other such ugliness. - In cases where the build job fails and prevents `generate-test-matrix` from running, a ghost `test` job that doesn't actually exist noises up the HUD and our stats. - A bunch of useless `generate-test-matrix` jobs (8 on PRs) noise up our signal. As far as I can tell, this complexity is unnecessary--we have all the information we need to generate the build matrix statically. There does not appear to be an advantage in retaining generate-build-matrix, so I am removing `generate-test-matrix` to simplify the CI. The *only* place where we were actually doing something dynamic is in our windows gpu workflow, where we would check at runtime whether the workflow was triggered from a PR or master and behave accordingly. This is more simply done by just having two separate workflows with different trigger conditions, which avoids the madness of needing to parse labels and forking the behavior dynamically, which has been a source of confusion in the past. Pull Request resolved: https://github.com/pytorch/pytorch/pull/73001
This commit is contained in:
parent
2c916ef198
commit
2cd0667928
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
|
|
@ -9,3 +9,4 @@ self-hosted-runner:
|
|||
- windows.4xlarge
|
||||
- windows.8xlarge.nvidia.gpu
|
||||
- bm-runner
|
||||
- linux.rocm.gpu
|
||||
|
|
|
|||
14
.github/generated-ciflow-ruleset.json
generated
vendored
14
.github/generated-ciflow-ruleset.json
generated
vendored
|
|
@ -44,7 +44,8 @@
|
|||
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
|
||||
"pytorch-xla-linux-bionic-py3.7-clang8",
|
||||
"win-vs2019-cpu-py3",
|
||||
"win-vs2019-cuda11.3-py3"
|
||||
"win-vs2019-cuda11.3-py3",
|
||||
"win-vs2019-cuda11.3-py3-smoke"
|
||||
],
|
||||
"ciflow/android": [
|
||||
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
|
||||
|
|
@ -120,7 +121,8 @@
|
|||
"periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
|
||||
"periodic-win-vs2019-cuda11.1-py3",
|
||||
"periodic-win-vs2019-cuda11.5-py3",
|
||||
"win-vs2019-cuda11.3-py3"
|
||||
"win-vs2019-cuda11.3-py3",
|
||||
"win-vs2019-cuda11.3-py3-smoke"
|
||||
],
|
||||
"ciflow/default": [
|
||||
"linux-binary-conda",
|
||||
|
|
@ -149,7 +151,7 @@
|
|||
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
|
||||
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
|
||||
"win-vs2019-cpu-py3",
|
||||
"win-vs2019-cuda11.3-py3",
|
||||
"win-vs2019-cuda11.3-py3-smoke",
|
||||
"windows-binary-libtorch-cxx11-abi",
|
||||
"windows-binary-libtorch-pre-cxx11",
|
||||
"windows-binary-wheel"
|
||||
|
|
@ -281,7 +283,8 @@
|
|||
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
|
||||
"pytorch-xla-linux-bionic-py3.7-clang8",
|
||||
"win-vs2019-cpu-py3",
|
||||
"win-vs2019-cuda11.3-py3"
|
||||
"win-vs2019-cuda11.3-py3",
|
||||
"win-vs2019-cuda11.3-py3-smoke"
|
||||
],
|
||||
"ciflow/vulkan": [
|
||||
"linux-vulkan-bionic-py3.7-clang9"
|
||||
|
|
@ -290,7 +293,8 @@
|
|||
"periodic-win-vs2019-cuda11.1-py3",
|
||||
"periodic-win-vs2019-cuda11.5-py3",
|
||||
"win-vs2019-cpu-py3",
|
||||
"win-vs2019-cuda11.3-py3"
|
||||
"win-vs2019-cuda11.3-py3",
|
||||
"win-vs2019-cuda11.3-py3-smoke"
|
||||
],
|
||||
"ciflow/xla": [
|
||||
"pytorch-xla-linux-bionic-py3.7-clang8"
|
||||
|
|
|
|||
185
.github/scripts/generate_ci_workflows.py
vendored
185
.github/scripts/generate_ci_workflows.py
vendored
|
|
@ -2,17 +2,16 @@
|
|||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, Set, List, Iterable
|
||||
from typing import Dict, Set, List, Iterable, Any
|
||||
|
||||
import jinja2
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing_extensions import Literal
|
||||
from typing_extensions import Literal, TypedDict
|
||||
|
||||
import generate_binary_build_matrix # type: ignore[import]
|
||||
|
||||
YamlShellBool = Literal["''", 1]
|
||||
Arch = Literal["windows", "linux", "macos"]
|
||||
|
||||
DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
|
||||
|
|
@ -142,6 +141,11 @@ class CIFlowRuleset:
|
|||
outfile.write('\n')
|
||||
|
||||
|
||||
class Config(TypedDict):
|
||||
num_shards: int
|
||||
runner: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class CIWorkflow:
|
||||
# Required fields
|
||||
|
|
@ -162,50 +166,38 @@ class CIWorkflow:
|
|||
is_scheduled: str = ''
|
||||
is_default: bool = False
|
||||
num_test_shards: int = 1
|
||||
only_run_smoke_tests_on_pull_request: bool = False
|
||||
num_test_shards_on_pull_request: int = -1
|
||||
distributed_test: bool = True
|
||||
timeout_after: int = 240
|
||||
xcode_version: str = ''
|
||||
only_on_pr: bool = False
|
||||
ios_arch: str = ''
|
||||
ios_platform: str = ''
|
||||
test_jobs: Any = field(default_factory=list)
|
||||
|
||||
# The following variables will be set as environment variables,
|
||||
# so it's easier for both shell and Python scripts to consume it if false is represented as the empty string.
|
||||
enable_jit_legacy_test: YamlShellBool = "''"
|
||||
enable_distributed_test: YamlShellBool = "''"
|
||||
enable_multigpu_test: YamlShellBool = "''"
|
||||
enable_nogpu_no_avx_test: YamlShellBool = "''"
|
||||
enable_nogpu_no_avx2_test: YamlShellBool = "''"
|
||||
enable_slow_test: YamlShellBool = "''"
|
||||
enable_docs_test: YamlShellBool = "''"
|
||||
enable_backwards_compat_test: YamlShellBool = "''"
|
||||
enable_xla_test: YamlShellBool = "''"
|
||||
enable_noarch_test: YamlShellBool = "''"
|
||||
enable_force_on_cpu_test: YamlShellBool = "''"
|
||||
enable_default_test: bool = True
|
||||
enable_smoke_test: bool = True
|
||||
enable_jit_legacy_test: bool = False
|
||||
enable_distributed_test: bool = True
|
||||
enable_multigpu_test: bool = False
|
||||
enable_nogpu_no_avx_test: bool = False
|
||||
enable_nogpu_no_avx2_test: bool = False
|
||||
enable_slow_test: bool = False
|
||||
enable_docs_test: bool = False
|
||||
enable_backwards_compat_test: bool = False
|
||||
enable_xla_test: bool = False
|
||||
enable_noarch_test: bool = False
|
||||
enable_force_on_cpu_test: bool = False
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.build_generates_artifacts:
|
||||
self.exclude_test = True
|
||||
|
||||
if self.distributed_test:
|
||||
self.enable_distributed_test = 1
|
||||
|
||||
self.multigpu_runner_type = LINUX_MULTIGPU_RUNNERS.get(self.test_runner_type, "linux.16xlarge.nvidia.gpu")
|
||||
self.distributed_gpu_runner_type = LINUX_DISTRIBUTED_GPU_RUNNERS.get(self.test_runner_type, "linux.8xlarge.nvidia.gpu")
|
||||
|
||||
if LABEL_CIFLOW_DEFAULT in self.ciflow_config.labels:
|
||||
self.is_default = True
|
||||
|
||||
# If num_test_shards_on_pull_request is not user-defined, default to num_test_shards unless we are
|
||||
# only running smoke tests on the pull request.
|
||||
if self.num_test_shards_on_pull_request == -1:
|
||||
# Don't run the default if we are only running smoke tests
|
||||
if self.only_run_smoke_tests_on_pull_request:
|
||||
self.num_test_shards_on_pull_request = 0
|
||||
else:
|
||||
self.num_test_shards_on_pull_request = self.num_test_shards
|
||||
self.test_jobs = self._gen_test_jobs()
|
||||
self.assert_valid()
|
||||
|
||||
def assert_valid(self) -> None:
|
||||
|
|
@ -254,6 +246,83 @@ class CIWorkflow:
|
|||
output_file.write("\n")
|
||||
print(output_file_path)
|
||||
|
||||
def normalized_build_environment(self, suffix: str) -> str:
|
||||
return self.build_environment.replace(".", "_") + suffix
|
||||
|
||||
def _gen_test_jobs(self) -> Any:
|
||||
if self.arch == "linux":
|
||||
MULTIGPU_RUNNER_TYPE = "linux.16xlarge.nvidia.gpu"
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE = "linux.8xlarge.nvidia.gpu"
|
||||
NOGPU_RUNNER_TYPE = "linux.2xlarge"
|
||||
elif self.arch == "windows":
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE = self.test_runner_type
|
||||
NOGPU_RUNNER_TYPE = "windows.4xlarge"
|
||||
|
||||
test_jobs = []
|
||||
|
||||
configs: Dict[str, Config] = {}
|
||||
if self.enable_jit_legacy_test:
|
||||
configs["jit_legacy"] = {"num_shards": 1, "runner": self.test_runner_type}
|
||||
if self.enable_multigpu_test:
|
||||
configs["multigpu"] = {"num_shards": 1, "runner": MULTIGPU_RUNNER_TYPE}
|
||||
|
||||
if self.enable_nogpu_no_avx_test:
|
||||
configs["nogpu_NO_AVX"] = {"num_shards": 1, "runner": NOGPU_RUNNER_TYPE}
|
||||
if self.enable_nogpu_no_avx2_test:
|
||||
configs["nogpu_NO_AVX2"] = {"num_shards": 1, "runner": NOGPU_RUNNER_TYPE}
|
||||
if self.enable_force_on_cpu_test:
|
||||
configs["force_on_cpu"] = {"num_shards": 1, "runner": NOGPU_RUNNER_TYPE}
|
||||
if self.enable_distributed_test:
|
||||
configs["distributed"] = {
|
||||
"num_shards": 1,
|
||||
"runner": DISTRIBUTED_GPU_RUNNER_TYPE
|
||||
if "cuda" in str(self.build_environment)
|
||||
else self.test_runner_type,
|
||||
}
|
||||
if self.enable_slow_test:
|
||||
configs["slow"] = {"num_shards": 1, "runner": self.test_runner_type}
|
||||
if self.enable_docs_test:
|
||||
configs["docs_test"] = {"num_shards": 1, "runner": self.test_runner_type}
|
||||
if self.enable_backwards_compat_test:
|
||||
configs["backwards_compat"] = {
|
||||
"num_shards": 1,
|
||||
"runner": self.test_runner_type,
|
||||
}
|
||||
if self.enable_xla_test:
|
||||
configs["xla"] = {"num_shards": 1, "runner": self.test_runner_type}
|
||||
if self.enable_noarch_test:
|
||||
configs["noarch"] = {"num_shards": 1, "runner": self.test_runner_type}
|
||||
|
||||
if self.enable_smoke_test:
|
||||
configs["smoke_tests"] = {"num_shards": 1, "runner": self.test_runner_type}
|
||||
|
||||
for name, config in configs.items():
|
||||
for shard in range(1, config["num_shards"] + 1):
|
||||
test_jobs.append(
|
||||
{
|
||||
"id": f"test_{name}_{shard}_{config['num_shards']}",
|
||||
"name": f"test ({name}, {shard}, {config['num_shards']}, {config['runner']})",
|
||||
"config": name,
|
||||
"shard": shard,
|
||||
"num_shards": config["num_shards"],
|
||||
"runner": config["runner"],
|
||||
}
|
||||
)
|
||||
|
||||
if self.enable_default_test:
|
||||
for shard in range(1, self.num_test_shards + 1):
|
||||
test_jobs.append(
|
||||
{
|
||||
"id": f"test_default_{shard}_{config['num_shards']}",
|
||||
"name": f"test (default, {shard}, {self.num_test_shards}, {self.test_runner_type})",
|
||||
"config": "default",
|
||||
"shard": shard,
|
||||
"num_shards": self.num_test_shards,
|
||||
"runner": self.test_runner_type,
|
||||
}
|
||||
)
|
||||
return test_jobs
|
||||
|
||||
@dataclass
|
||||
class DockerWorkflow:
|
||||
build_environment: str
|
||||
|
|
@ -327,17 +396,30 @@ WINDOWS_WORKFLOWS = [
|
|||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_CPU, LABEL_CIFLOW_WIN}
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="windows",
|
||||
build_environment="win-vs2019-cuda11.3-py3-smoke",
|
||||
cuda_version="11.3",
|
||||
test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
|
||||
enable_default_test=False,
|
||||
enable_smoke_test=True,
|
||||
enable_force_on_cpu_test=True,
|
||||
only_on_pr=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
run_on_canary=True,
|
||||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_CUDA, LABEL_CIFLOW_WIN}
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="windows",
|
||||
build_environment="win-vs2019-cuda11.3-py3",
|
||||
cuda_version="11.3",
|
||||
test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
only_run_smoke_tests_on_pull_request=True,
|
||||
enable_force_on_cpu_test=1,
|
||||
enable_force_on_cpu_test=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
run_on_canary=True,
|
||||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_CUDA, LABEL_CIFLOW_WIN}
|
||||
labels={LABEL_CIFLOW_CUDA, LABEL_CIFLOW_WIN}
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
|
|
@ -346,7 +428,7 @@ WINDOWS_WORKFLOWS = [
|
|||
cuda_version="11.5",
|
||||
test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
enable_force_on_cpu_test=1,
|
||||
enable_force_on_cpu_test=True,
|
||||
is_scheduled="45 4,10,16,22 * * *",
|
||||
ciflow_config=CIFlowConfig(
|
||||
run_on_canary=True,
|
||||
|
|
@ -372,9 +454,9 @@ LINUX_WORKFLOWS = [
|
|||
build_environment="linux-xenial-py3.7-gcc5.4",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.7-gcc5.4",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
enable_jit_legacy_test=1,
|
||||
enable_backwards_compat_test=1,
|
||||
enable_docs_test=1,
|
||||
enable_jit_legacy_test=True,
|
||||
enable_backwards_compat_test=True,
|
||||
enable_docs_test=True,
|
||||
num_test_shards=2,
|
||||
ciflow_config=CIFlowConfig(
|
||||
run_on_canary=True,
|
||||
|
|
@ -475,7 +557,7 @@ LINUX_WORKFLOWS = [
|
|||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-asan",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
num_test_shards=3,
|
||||
distributed_test=False,
|
||||
enable_distributed_test=False,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_SANITIZERS, LABEL_CIFLOW_CPU},
|
||||
),
|
||||
|
|
@ -486,7 +568,7 @@ LINUX_WORKFLOWS = [
|
|||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
distributed_test=False,
|
||||
enable_distributed_test=False,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_ONNX, LABEL_CIFLOW_CPU},
|
||||
),
|
||||
|
|
@ -496,11 +578,11 @@ LINUX_WORKFLOWS = [
|
|||
build_environment="linux-bionic-cuda10.2-py3.9-gcc7",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
|
||||
test_runner_type=LINUX_CUDA_TEST_RUNNER,
|
||||
enable_jit_legacy_test=1,
|
||||
enable_multigpu_test=1,
|
||||
enable_nogpu_no_avx_test=1,
|
||||
enable_nogpu_no_avx2_test=1,
|
||||
enable_slow_test=1,
|
||||
enable_jit_legacy_test=True,
|
||||
enable_multigpu_test=True,
|
||||
enable_nogpu_no_avx_test=True,
|
||||
enable_nogpu_no_avx2_test=True,
|
||||
enable_slow_test=True,
|
||||
num_test_shards=2,
|
||||
ciflow_config=CIFlowConfig(
|
||||
run_on_canary=True,
|
||||
|
|
@ -623,8 +705,8 @@ LINUX_WORKFLOWS = [
|
|||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.7-clang9",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
distributed_test=False,
|
||||
enable_noarch_test=1,
|
||||
enable_distributed_test=False,
|
||||
enable_noarch_test=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU, LABEL_CIFLOW_NOARCH},
|
||||
),
|
||||
|
|
@ -635,7 +717,7 @@ LINUX_WORKFLOWS = [
|
|||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.7-clang9",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
num_test_shards=1,
|
||||
distributed_test=False,
|
||||
enable_distributed_test=False,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU, LABEL_CIFLOW_VULKAN},
|
||||
),
|
||||
|
|
@ -646,7 +728,7 @@ LINUX_WORKFLOWS = [
|
|||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
|
||||
test_runner_type=LINUX_CUDA_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
distributed_test=False,
|
||||
enable_distributed_test=False,
|
||||
timeout_after=360,
|
||||
# Only run this on master 4 times per day since it does take a while
|
||||
is_scheduled="0 */4 * * *",
|
||||
|
|
@ -663,8 +745,9 @@ XLA_WORKFLOWS = [
|
|||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/xla_base",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
num_test_shards=2,
|
||||
distributed_test=False,
|
||||
enable_xla_test=1,
|
||||
enable_distributed_test=False,
|
||||
enable_xla_test=True,
|
||||
enable_default_test=False,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU, LABEL_CIFLOW_XLA},
|
||||
),
|
||||
|
|
@ -801,7 +884,7 @@ MACOS_WORKFLOWS = [
|
|||
xcode_version="12.4",
|
||||
test_runner_type=MACOS_TEST_RUNNER_11,
|
||||
num_test_shards=2,
|
||||
distributed_test=False,
|
||||
enable_distributed_test=False,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_MACOS},
|
||||
),
|
||||
|
|
|
|||
122
.github/scripts/generate_pytorch_test_matrix.py
vendored
122
.github/scripts/generate_pytorch_test_matrix.py
vendored
|
|
@ -1,122 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""Generates a matrix to be utilized through github actions
|
||||
|
||||
Will output a matrix to represent our testing configurations, which is currently
|
||||
dictated by just sharding.
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
BUILD_ENVIRONMENT = os.getenv('BUILD_ENVIRONMENT')
|
||||
assert BUILD_ENVIRONMENT is not None
|
||||
|
||||
class Config(TypedDict):
|
||||
num_shards: int
|
||||
runner: str
|
||||
|
||||
|
||||
# When the user specifies labels that are NOT ciflow/default, the expectation is
|
||||
# that the workflows should be triggered as if they are on trunk. For example, when
|
||||
# ciflow/all is specified, we should run the full test suite for Windows CUDA
|
||||
# and NOT only the smoke tests.
|
||||
def run_as_if_on_trunk() -> bool:
|
||||
ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
|
||||
if not ON_PULL_REQUEST:
|
||||
return True
|
||||
|
||||
from pathlib import Path
|
||||
GITHUB_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
with open(f'{GITHUB_DIR}/generated-ciflow-ruleset.json') as f:
|
||||
labels_to_workflows = json.load(f)['label_rules']
|
||||
|
||||
pr_labels = json.loads(os.getenv('PR_LABELS', '[]'))
|
||||
current_workflow_triggered_by_label = False
|
||||
for label in pr_labels:
|
||||
if label != 'ciflow/default' and label in labels_to_workflows:
|
||||
workflows_triggered_by_label = labels_to_workflows[label]
|
||||
if any([BUILD_ENVIRONMENT in workflow for workflow in workflows_triggered_by_label]):
|
||||
current_workflow_triggered_by_label = True
|
||||
break
|
||||
|
||||
return current_workflow_triggered_by_label
|
||||
|
||||
def main() -> None:
|
||||
INCLUDE_DEFAULT_TEST = True
|
||||
TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
|
||||
assert TEST_RUNNER_TYPE is not None
|
||||
RUN_SMOKE_TESTS_ONLY_ON_PR = os.getenv('RUN_SMOKE_TESTS_ONLY_ON_PR')
|
||||
RUN_SMOKE_TESTS = RUN_SMOKE_TESTS_ONLY_ON_PR == "true" and not run_as_if_on_trunk()
|
||||
NUM_TEST_SHARDS_ON_PULL_REQUEST = os.getenv('NUM_TEST_SHARDS_ON_PULL_REQUEST')
|
||||
NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '0'))
|
||||
if not run_as_if_on_trunk() and NUM_TEST_SHARDS_ON_PULL_REQUEST:
|
||||
NUM_TEST_SHARDS = int(NUM_TEST_SHARDS_ON_PULL_REQUEST)
|
||||
MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE = os.getenv('DISTRIBUTED_GPU_RUNNER_TYPE', TEST_RUNNER_TYPE)
|
||||
NOGPU_RUNNER_TYPE = os.getenv('NOGPU_RUNNER_TYPE')
|
||||
configs: Dict[str, Config] = {}
|
||||
if os.getenv('ENABLE_JIT_LEGACY_TEST'):
|
||||
configs['jit_legacy'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
|
||||
configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
|
||||
if NOGPU_RUNNER_TYPE is not None:
|
||||
if os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
|
||||
configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
|
||||
if os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
|
||||
configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
|
||||
if os.getenv('ENABLE_FORCE_ON_CPU_TEST'):
|
||||
configs['force_on_cpu'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
|
||||
if os.getenv('ENABLE_DISTRIBUTED_TEST'):
|
||||
configs['distributed'] = {
|
||||
'num_shards': 1,
|
||||
'runner': DISTRIBUTED_GPU_RUNNER_TYPE if "cuda" in str(BUILD_ENVIRONMENT) else TEST_RUNNER_TYPE
|
||||
}
|
||||
if os.getenv('ENABLE_SLOW_TEST'):
|
||||
configs['slow'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
if os.getenv('ENABLE_DOCS_TEST'):
|
||||
configs['docs_test'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
if os.getenv('ENABLE_BACKWARDS_COMPAT_TEST'):
|
||||
configs['backwards_compat'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
if os.getenv('ENABLE_XLA_TEST'):
|
||||
configs['xla'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
INCLUDE_DEFAULT_TEST = False
|
||||
if os.getenv('ENABLE_NOARCH_TEST'):
|
||||
configs['noarch'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
if RUN_SMOKE_TESTS:
|
||||
configs['smoke_tests'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
|
||||
matrix = {
|
||||
'include': [
|
||||
{
|
||||
'config': 'default',
|
||||
'shard': shard,
|
||||
'num_shards': NUM_TEST_SHARDS,
|
||||
'runner': TEST_RUNNER_TYPE,
|
||||
}
|
||||
for shard in range(1, NUM_TEST_SHARDS + 1)
|
||||
if INCLUDE_DEFAULT_TEST
|
||||
] + [
|
||||
{
|
||||
'config': name,
|
||||
'shard': shard,
|
||||
'num_shards': config['num_shards'],
|
||||
'runner': config['runner'],
|
||||
}
|
||||
for name, config in configs.items()
|
||||
for shard in range(1, config['num_shards'] + 1)
|
||||
]
|
||||
}
|
||||
render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
|
||||
print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
|
||||
print(f'::set-output name=matrix::{json.dumps(matrix)}')
|
||||
print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
9
.github/templates/common.yml.j2
vendored
9
.github/templates/common.yml.j2
vendored
|
|
@ -219,13 +219,12 @@ concurrency:
|
|||
{%- endif %}
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro upload_downloaded_files(name, artifact_name="", use_s3=True, when="always()") -%}
|
||||
{%- macro upload_downloaded_files(name, config=None, shard=None, num_shards=None, runner=None, artifact_name="", use_s3=True, when="always()") -%}
|
||||
- name: Zip JSONs for upload
|
||||
if: !{{ when }}
|
||||
env:
|
||||
{%- if name == 'linux' or name == 'windows' or name == 'macos' %}
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
{%- else %}
|
||||
FILE_SUFFIX: '${{ github.job }}-!{{ config }}-!{{ shard }}-!{{ num_shards }}-!{{ runner }}'{%- else %}
|
||||
FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
|
||||
{%- endif %}
|
||||
{%- if name == 'windows' %}
|
||||
|
|
@ -257,12 +256,12 @@ concurrency:
|
|||
test-jsons-*.zip
|
||||
{%- endmacro -%}
|
||||
|
||||
{%- macro upload_test_reports(name, artifact_name="", use_s3=True) -%}
|
||||
{%- macro upload_test_reports(name, config=None, shard=None, num_shards=None, runner=None, artifact_name="", use_s3=True) -%}
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
{%- if name == 'linux' or name == 'windows' or name == 'macos' %}
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-!{{ config }}-!{{ shard }}-!{{ num_shards }}-!{{ runner }}'
|
||||
{%- else %}
|
||||
FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
|
||||
{%- endif %}
|
||||
|
|
|
|||
64
.github/templates/linux_ci_workflow.yml.j2
vendored
64
.github/templates/linux_ci_workflow.yml.j2
vendored
|
|
@ -176,53 +176,18 @@ jobs:
|
|||
{%- endblock %}
|
||||
{%- if not exclude_test %}
|
||||
{% block test +%}
|
||||
generate-test-matrix:
|
||||
{%- for test_job in test_jobs %}
|
||||
!{{ test_job.id }}:
|
||||
name: !{{ test_job.name }}
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
env:
|
||||
TEST_RUNNER_TYPE: !{{ test_runner_type }}
|
||||
ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
|
||||
ENABLE_JIT_LEGACY_TEST: !{{ enable_jit_legacy_test }}
|
||||
ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
|
||||
ENABLE_NOGPU_NO_AVX_TEST: !{{ enable_nogpu_no_avx_test }}
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: !{{ enable_nogpu_no_avx2_test }}
|
||||
ENABLE_SLOW_TEST: !{{ enable_slow_test }}
|
||||
ENABLE_DOCS_TEST: !{{ enable_docs_test }}
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: !{{ enable_backwards_compat_test }}
|
||||
ENABLE_XLA_TEST: !{{ enable_xla_test }}
|
||||
ENABLE_NOARCH_TEST: !{{ enable_noarch_test }}
|
||||
NUM_TEST_SHARDS: !{{ num_test_shards }}
|
||||
MULTIGPU_RUNNER_TYPE: !{{ multigpu_runner_type }}
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: !{{ distributed_gpu_runner_type }}
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: !{{ test_job.runner }}
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: !{{ build_environment }}-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: !{{ test_job.config }}
|
||||
SHARD_NUMBER: !{{ test_job.shard }}
|
||||
NUM_TEST_SHARDS: !{{ test_job.num_shards }}
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
|
|
@ -235,14 +200,12 @@ jobs:
|
|||
run: |
|
||||
!{{ common.add_retry_to_env() }}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
{%- if 'rocm' in test_runner_type and "nogpu" not in test_job.config %}
|
||||
- name: ROCm set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'rocm') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
{%- else %}
|
||||
{%- elif "cuda" in build_environment and "nogpu" not in test_job.config %}
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
|
|
@ -365,11 +328,11 @@ jobs:
|
|||
{%- endif %}
|
||||
!{{ common.render_test_results() }}
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
!{{ common.upload_downloaded_files(name='linux', use_s3=False) }}
|
||||
!{{ common.upload_test_reports(name='linux', artifact_name="test-reports", use_s3=False) }}
|
||||
!{{ common.upload_downloaded_files(name='linux', use_s3=False, config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner) }}
|
||||
!{{ common.upload_test_reports(name='linux', artifact_name="test-reports", use_s3=False, config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner) }}
|
||||
{%- else %}
|
||||
!{{ common.upload_downloaded_files(name='linux') }}
|
||||
!{{ common.upload_test_reports(name='linux') }}
|
||||
!{{ common.upload_downloaded_files(name='linux', config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner) }}
|
||||
!{{ common.upload_test_reports(name='linux', config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner) }}
|
||||
{%- endif %}
|
||||
!{{ common.upload_test_statistics(build_environment) }}
|
||||
{%- if 'rocm' in test_runner_type %}
|
||||
|
|
@ -377,6 +340,7 @@ jobs:
|
|||
{%- else %}
|
||||
!{{ common.teardown_ec2_linux() }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{% endblock %}
|
||||
{%- endif -%}
|
||||
{%- if enable_doc_jobs %}
|
||||
|
|
|
|||
42
.github/templates/macos_ci_workflow.yml.j2
vendored
42
.github/templates/macos_ci_workflow.yml.j2
vendored
|
|
@ -87,40 +87,17 @@ jobs:
|
|||
{% endblock +%}
|
||||
{%- if not exclude_test %}
|
||||
{% block test +%}
|
||||
generate-test-matrix:
|
||||
{%- for test_job in test_jobs %}
|
||||
!{{ test_job.id }}:
|
||||
name: !{{ test_job.name }}
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
env:
|
||||
TEST_RUNNER_TYPE: !{{ test_runner_type }}
|
||||
ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
|
||||
NUM_TEST_SHARDS: !{{ num_test_shards }}
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: !{{ test_job.runner }}
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
env:
|
||||
JOB_BASE_NAME: !{{ build_environment }}-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: !{{ test_job.config }}
|
||||
SHARD_NUMBER: !{{ test_job.shard }}
|
||||
NUM_TEST_SHARDS: !{{ test_job.num_shards }}
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
!{{ common.checkout(submodules="false") }}
|
||||
|
|
@ -143,9 +120,10 @@ jobs:
|
|||
python3 -mpip install dist/*.whl
|
||||
.jenkins/pytorch/macos-test.sh
|
||||
!{{ common.render_test_results() }}
|
||||
!{{ common.upload_downloaded_files(name='macos', artifact_name="test-jsons", use_s3=False) }}
|
||||
!{{ common.upload_test_reports("macos", artifact_name="test-reports", use_s3=False) }}
|
||||
!{{ common.upload_downloaded_files(name='macos', config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner, artifact_name="test-jsons", use_s3=False) }}
|
||||
!{{ common.upload_test_reports("macos", config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner, artifact_name="test-reports", use_s3=False) }}
|
||||
!{{ common.upload_test_statistics(build_environment, needs_credentials=True) }}
|
||||
{%- endfor %}
|
||||
{% endblock +%}
|
||||
{%- endif %}
|
||||
|
||||
|
|
|
|||
56
.github/templates/windows_ci_workflow.yml.j2
vendored
56
.github/templates/windows_ci_workflow.yml.j2
vendored
|
|
@ -31,11 +31,12 @@ on:
|
|||
- '!{{ label }}/*'
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if not is_scheduled %}
|
||||
{%- if not is_scheduled and not only_on_pr %}
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
{%- else %}
|
||||
{%- endif %}
|
||||
{%- if is_scheduled and not only_on_pr %}
|
||||
schedule:
|
||||
- cron: !{{ is_scheduled }}
|
||||
{%- endif %}
|
||||
|
|
@ -130,46 +131,20 @@ jobs:
|
|||
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
rm -rf ./*
|
||||
|
||||
generate-test-matrix:
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
env:
|
||||
TEST_RUNNER_TYPE: !{{ test_runner_type }}
|
||||
NUM_TEST_SHARDS: !{{ num_test_shards }}
|
||||
NUM_TEST_SHARDS_ON_PULL_REQUEST: !{{ num_test_shards_on_pull_request }}
|
||||
NOGPU_RUNNER_TYPE: windows.4xlarge
|
||||
ENABLE_FORCE_ON_CPU_TEST: !{{ enable_force_on_cpu_test }}
|
||||
RUN_SMOKE_TESTS_ONLY_ON_PR: !{{ only_run_smoke_tests_on_pull_request }}
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
{%- for test_job in test_jobs %}
|
||||
!{{ test_job.id }}:
|
||||
name: !{{ test_job.name }}
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
env:
|
||||
JOB_BASE_NAME: !{{ build_environment }}-test
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: !{{ test_job.shard }}
|
||||
NUM_TEST_SHARDS: !{{ test_job.num_shards }}
|
||||
TEST_CONFIG: !{{ test_job.config }}
|
||||
http_proxy: "!{{ common.squid_proxy }}"
|
||||
https_proxy: "!{{ common.squid_proxy }}"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
needs: build
|
||||
runs-on: !{{ test_job.runner }}
|
||||
steps:
|
||||
!{{ common.display_ec2_information() }}
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
|
|
@ -181,14 +156,12 @@ jobs:
|
|||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
{%- if cuda_version != "cpu" %}
|
||||
{%- if cuda_version != "cpu" and not test_job.config == 'force_on_cpu' %}
|
||||
- name: Install Cuda
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
|
|
@ -215,8 +188,8 @@ jobs:
|
|||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
!{{ common.upload_downloaded_files(name='windows') }}
|
||||
!{{ common.upload_test_reports(name='windows') }}
|
||||
!{{ common.upload_downloaded_files(name='windows', config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner) }}
|
||||
!{{ common.upload_test_reports(name='windows', config=test_job.config, shard=test_job.shard, num_shards=test_job.num_shards, runner=test_job.runner) }}
|
||||
!{{ common.render_test_results() }}
|
||||
!{{ common.wait_and_kill_ssh_windows() }}
|
||||
!{{ common.parse_ref() }}
|
||||
|
|
@ -227,3 +200,4 @@ jobs:
|
|||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
{%- endfor %}
|
||||
|
|
|
|||
2061
.github/workflows/generated-linux-bionic-cuda10.2-py3.9-gcc7.yml
generated
vendored
2061
.github/workflows/generated-linux-bionic-cuda10.2-py3.9-gcc7.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
801
.github/workflows/generated-linux-bionic-py3.7-clang9.yml
generated
vendored
801
.github/workflows/generated-linux-bionic-py3.7-clang9.yml
generated
vendored
|
|
@ -250,53 +250,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_noarch_1_1:
|
||||
name: test (noarch, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: noarch
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -359,11 +323,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -476,7 +435,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-noarch-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -492,7 +451,751 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-noarch-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-py3.7-clang9-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
725
.github/workflows/generated-linux-bionic-rocm4.5-py3.7.yml
generated
vendored
725
.github/workflows/generated-linux-bionic-rocm4.5-py3.7.yml
generated
vendored
|
|
@ -249,53 +249,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, linux.rocm.gpu)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.rocm.gpu
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.rocm.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.rocm.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.rocm.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: distributed
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Clean workspace
|
||||
|
|
@ -352,7 +316,6 @@ jobs:
|
|||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: ROCm set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'rocm') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
|
|
@ -455,7 +418,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -471,7 +434,679 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Reports on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-reports
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.rocm.gpu)
|
||||
needs: build
|
||||
runs-on: linux.rocm.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: Set DOCKER_HOST
|
||||
run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
|
||||
- name: Runner health check system info
|
||||
if: always()
|
||||
run: |
|
||||
cat /etc/os-release || true
|
||||
cat /etc/apt/sources.list.d/rocm.list || true
|
||||
cat /opt/rocm/.info/version || true
|
||||
whoami
|
||||
- name: Runner health check rocm-smi
|
||||
if: always()
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: Runner health check rocminfo
|
||||
if: always()
|
||||
run: |
|
||||
rocminfo
|
||||
- name: Runner health check GPU count
|
||||
if: always()
|
||||
run: |
|
||||
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
||||
if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then
|
||||
echo "Failed to detect GPUs on the runner"
|
||||
exit 1
|
||||
fi
|
||||
- name: Runner health check disconnect on failure
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
killall runsvc.sh
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: ROCm set GPU_FLAG
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
# jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
|
||||
docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${container_name}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Downloaded JSONs on Github
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Reports on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-reports
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.rocm.gpu)
|
||||
needs: build
|
||||
runs-on: linux.rocm.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: Set DOCKER_HOST
|
||||
run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
|
||||
- name: Runner health check system info
|
||||
if: always()
|
||||
run: |
|
||||
cat /etc/os-release || true
|
||||
cat /etc/apt/sources.list.d/rocm.list || true
|
||||
cat /opt/rocm/.info/version || true
|
||||
whoami
|
||||
- name: Runner health check rocm-smi
|
||||
if: always()
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: Runner health check rocminfo
|
||||
if: always()
|
||||
run: |
|
||||
rocminfo
|
||||
- name: Runner health check GPU count
|
||||
if: always()
|
||||
run: |
|
||||
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
||||
if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then
|
||||
echo "Failed to detect GPUs on the runner"
|
||||
exit 1
|
||||
fi
|
||||
- name: Runner health check disconnect on failure
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
killall runsvc.sh
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: ROCm set GPU_FLAG
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
# jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
|
||||
docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${container_name}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Downloaded JSONs on Github
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Reports on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-reports
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.rocm.gpu)
|
||||
needs: build
|
||||
runs-on: linux.rocm.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-bionic-rocm4.5-py3.7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: Set DOCKER_HOST
|
||||
run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
|
||||
- name: Runner health check system info
|
||||
if: always()
|
||||
run: |
|
||||
cat /etc/os-release || true
|
||||
cat /etc/apt/sources.list.d/rocm.list || true
|
||||
cat /opt/rocm/.info/version || true
|
||||
whoami
|
||||
- name: Runner health check rocm-smi
|
||||
if: always()
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: Runner health check rocminfo
|
||||
if: always()
|
||||
run: |
|
||||
rocminfo
|
||||
- name: Runner health check GPU count
|
||||
if: always()
|
||||
run: |
|
||||
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
||||
if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then
|
||||
echo "Failed to detect GPUs on the runner"
|
||||
exit 1
|
||||
fi
|
||||
- name: Runner health check disconnect on failure
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
killall runsvc.sh
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: ROCm set GPU_FLAG
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
# jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
|
||||
docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${container_name}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Downloaded JSONs on Github
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.rocm.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
305
.github/workflows/generated-linux-vulkan-bionic-py3.7-clang9.yml
generated
vendored
305
.github/workflows/generated-linux-vulkan-bionic-py3.7-clang9.yml
generated
vendored
|
|
@ -250,53 +250,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 1
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-vulkan-bionic-py3.7-clang9-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -359,11 +323,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -476,7 +435,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -492,7 +451,255 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-vulkan-bionic-py3.7-clang9-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-vulkan-bionic-py3.7-clang9-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
809
.github/workflows/generated-linux-xenial-cuda11.3-py3.7-gcc7.yml
generated
vendored
809
.github/workflows/generated-linux-xenial-cuda11.3-py3.7-gcc7.yml
generated
vendored
|
|
@ -249,53 +249,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, linux.8xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.4xlarge.nvidia.gpu
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.8xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: distributed
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -359,7 +323,6 @@ jobs:
|
|||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
|
|
@ -475,7 +438,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.8xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -491,7 +454,763 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.8xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.7-gcc7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
801
.github/workflows/generated-linux-xenial-py3.7-clang7-asan.yml
generated
vendored
801
.github/workflows/generated-linux-xenial-py3.7-clang7-asan.yml
generated
vendored
|
|
@ -250,53 +250,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 3
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -359,11 +323,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -476,7 +435,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -492,7 +451,751 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 3, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 3
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-3-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-3-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 3, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 3
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-3-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-3-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_3_1:
|
||||
name: test (default, 3, 3, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-asan-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 3
|
||||
NUM_TEST_SHARDS: 3
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-3-3-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-3-3-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
553
.github/workflows/generated-linux-xenial-py3.7-clang7-onnx.yml
generated
vendored
553
.github/workflows/generated-linux-xenial-py3.7-clang7-onnx.yml
generated
vendored
|
|
@ -250,53 +250,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-onnx-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -359,11 +323,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -476,7 +435,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -492,7 +451,503 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-onnx-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-onnx-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-onnx-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-clang7-onnx-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
1545
.github/workflows/generated-linux-xenial-py3.7-gcc5.4.yml
generated
vendored
1545
.github/workflows/generated-linux-xenial-py3.7-gcc5.4.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
801
.github/workflows/generated-linux-xenial-py3.7-gcc7.yml
generated
vendored
801
.github/workflows/generated-linux-xenial-py3.7-gcc7.yml
generated
vendored
|
|
@ -249,53 +249,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: distributed
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -358,11 +322,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -475,7 +434,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -491,7 +450,751 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: linux-xenial-py3.7-gcc7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
268
.github/workflows/generated-macos-11-py3-x86-64.yml
generated
vendored
268
.github/workflows/generated-macos-11-py3-x86-64.yml
generated
vendored
|
|
@ -85,40 +85,16 @@ jobs:
|
|||
artifacts.zip
|
||||
|
||||
|
||||
generate-test-matrix:
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, macos-11)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: macos-11
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: macos-11
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: macos-11-py3-x86-64-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
|
|
@ -173,7 +149,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-macos-11'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -190,7 +166,235 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-macos-11'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Reports on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-reports
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: macos-11-py3-x86-64-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_SECRET_ACCESS_KEY }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, macos-11)
|
||||
needs: build
|
||||
runs-on: macos-11
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: macos-11-py3-x86-64-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: false
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- uses: actions/download-artifact@v2
|
||||
name: Download PyTorch Build Artifacts from GHA
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: .
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Setup miniconda
|
||||
uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
auto-update-conda: true
|
||||
python-version: 3.8
|
||||
activate-environment: build
|
||||
- name: Install macOS homebrew dependencies
|
||||
run: |
|
||||
# Install dependencies
|
||||
brew install libomp
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
run: |
|
||||
python3 -mpip install dist/*.whl
|
||||
.jenkins/pytorch/macos-test.sh
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-macos-11'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Downloaded JSONs on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-jsons
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-macos-11'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Reports on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-reports
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: macos-11-py3-x86-64-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_SECRET_ACCESS_KEY }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, macos-11)
|
||||
needs: build
|
||||
runs-on: macos-11
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: macos-11-py3-x86-64-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: false
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- uses: actions/download-artifact@v2
|
||||
name: Download PyTorch Build Artifacts from GHA
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: .
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Setup miniconda
|
||||
uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
auto-update-conda: true
|
||||
python-version: 3.8
|
||||
activate-environment: build
|
||||
- name: Install macOS homebrew dependencies
|
||||
run: |
|
||||
# Install dependencies
|
||||
brew install libomp
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
run: |
|
||||
python3 -mpip install dist/*.whl
|
||||
.jenkins/pytorch/macos-test.sh
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-macos-11'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store Test Downloaded JSONs on Github
|
||||
if: always()
|
||||
with:
|
||||
name: test-jsons
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-macos-11'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
553
.github/workflows/generated-parallelnative-linux-xenial-py3.7-gcc5.4.yml
generated
vendored
553
.github/workflows/generated-parallelnative-linux-xenial-py3.7-gcc5.4.yml
generated
vendored
|
|
@ -248,53 +248,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 1
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: parallelnative-linux-xenial-py3.7-gcc5.4-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: distributed
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -357,11 +321,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -474,7 +433,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -490,7 +449,503 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: parallelnative-linux-xenial-py3.7-gcc5.4-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: parallelnative-linux-xenial-py3.7-gcc5.4-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: parallelnative-linux-xenial-py3.7-gcc5.4-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: parallelnative-linux-xenial-py3.7-gcc5.4-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
809
.github/workflows/generated-periodic-linux-bionic-cuda11.5-py3.7-gcc7.yml
generated
vendored
809
.github/workflows/generated-periodic-linux-bionic-cuda11.5-py3.7-gcc7.yml
generated
vendored
|
|
@ -247,53 +247,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, linux.8xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.4xlarge.nvidia.gpu
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.8xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: distributed
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -357,7 +321,6 @@ jobs:
|
|||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
|
|
@ -473,7 +436,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.8xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -489,7 +452,763 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.8xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-bionic-cuda11.5-py3.7-gcc7-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
557
.github/workflows/generated-periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck.yml
generated
vendored
557
.github/workflows/generated-periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck.yml
generated
vendored
|
|
@ -249,53 +249,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.4xlarge.nvidia.gpu
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -359,7 +323,6 @@ jobs:
|
|||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
|
|
@ -475,7 +438,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -491,7 +454,511 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 360 minutes
|
||||
timeout-minutes: 360
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 360 minutes
|
||||
timeout-minutes: 360
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
809
.github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug.yml
generated
vendored
809
.github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug.yml
generated
vendored
|
|
@ -248,53 +248,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, linux.8xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.4xlarge.nvidia.gpu
|
||||
ENABLE_DISTRIBUTED_TEST: 1
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: ''
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.8xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: distributed
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -358,7 +322,6 @@ jobs:
|
|||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
|
|
@ -474,7 +437,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.8xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -490,7 +453,763 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-linux.8xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, linux.4xlarge.nvidia.gpu)
|
||||
needs: build
|
||||
runs-on: linux.4xlarge.nvidia.gpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test
|
||||
TEST_CONFIG: default
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-linux.4xlarge.nvidia.gpu'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
522
.github/workflows/generated-periodic-win-vs2019-cuda11.1-py3.yml
generated
vendored
522
.github/workflows/generated-periodic-win-vs2019-cuda11.1-py3.yml
generated
vendored
|
|
@ -131,47 +131,19 @@ jobs:
|
|||
run: |
|
||||
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
rm -rf ./*
|
||||
|
||||
generate-test-matrix:
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
|
||||
NUM_TEST_SHARDS: 2
|
||||
NUM_TEST_SHARDS_ON_PULL_REQUEST: 2
|
||||
NOGPU_RUNNER_TYPE: windows.4xlarge
|
||||
ENABLE_FORCE_ON_CPU_TEST: ''
|
||||
RUN_SMOKE_TESTS_ONLY_ON_PR: False
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: distributed
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
|
@ -206,12 +178,10 @@ jobs:
|
|||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
|
|
@ -240,7 +210,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
@ -256,7 +226,481 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: smoke_tests
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
|
|||
688
.github/workflows/generated-periodic-win-vs2019-cuda11.5-py3.yml
generated
vendored
688
.github/workflows/generated-periodic-win-vs2019-cuda11.5-py3.yml
generated
vendored
|
|
@ -131,47 +131,19 @@ jobs:
|
|||
run: |
|
||||
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
rm -rf ./*
|
||||
|
||||
generate-test-matrix:
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
|
||||
NUM_TEST_SHARDS: 2
|
||||
NUM_TEST_SHARDS_ON_PULL_REQUEST: 2
|
||||
NOGPU_RUNNER_TYPE: windows.4xlarge
|
||||
ENABLE_FORCE_ON_CPU_TEST: 1
|
||||
RUN_SMOKE_TESTS_ONLY_ON_PR: False
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
test_force_on_cpu_1_1:
|
||||
name: test (force_on_cpu, 1, 1, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: force_on_cpu
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
|
@ -205,16 +177,6 @@ jobs:
|
|||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
|
|
@ -240,7 +202,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-force_on_cpu-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
@ -256,7 +218,639 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-force_on_cpu-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: distributed
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: smoke_tests
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: periodic-win-vs2019-cuda11.5-py3-test
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
|
|||
306
.github/workflows/generated-pytorch-xla-linux-bionic-py3.7-clang8.yml
generated
vendored
306
.github/workflows/generated-pytorch-xla-linux-bionic-py3.7-clang8.yml
generated
vendored
|
|
@ -215,53 +215,17 @@ jobs:
|
|||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
|
||||
generate-test-matrix:
|
||||
test_xla_1_1:
|
||||
name: test (xla, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: linux.2xlarge
|
||||
ENABLE_DISTRIBUTED_TEST: ''
|
||||
ENABLE_JIT_LEGACY_TEST: ''
|
||||
ENABLE_MULTIGPU_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX_TEST: ''
|
||||
ENABLE_NOGPU_NO_AVX2_TEST: ''
|
||||
ENABLE_SLOW_TEST: ''
|
||||
ENABLE_DOCS_TEST: ''
|
||||
ENABLE_BACKWARDS_COMPAT_TEST: ''
|
||||
ENABLE_XLA_TEST: 1
|
||||
ENABLE_NOARCH_TEST: ''
|
||||
NUM_TEST_SHARDS: 2
|
||||
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
|
||||
DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
|
||||
NOGPU_RUNNER_TYPE: linux.2xlarge
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: pytorch-xla-linux-bionic-py3.7-clang8-test
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: xla
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
|
|
@ -324,11 +288,6 @@ jobs:
|
|||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
|
|
@ -442,7 +401,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-xla-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
|
|
@ -458,7 +417,256 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-xla-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: pytorch-xla-linux-bionic-py3.7-clang8-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, linux.2xlarge)
|
||||
needs: build
|
||||
runs-on: linux.2xlarge
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
|
||||
JOB_BASE_NAME: pytorch-xla-linux-bionic-py3.7-clang8-test
|
||||
TEST_CONFIG: smoke_tests
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE}"
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
unzip -o artifacts.zip
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Test
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
# Time out the test phase after 240 minutes
|
||||
timeout-minutes: 240
|
||||
run: |
|
||||
set -x
|
||||
|
||||
if [[ $TEST_CONFIG == 'multigpu' ]]; then
|
||||
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
|
||||
elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
|
||||
TEST_COMMAND=.jenkins/caffe2/test.sh
|
||||
else
|
||||
TEST_COMMAND=.jenkins/pytorch/test.sh
|
||||
fi
|
||||
PROXY_ENV=
|
||||
# NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
|
||||
# We should investigate whether or not there's a list of hostnames we can add to no_proxy to
|
||||
# make it so that we shouldn't have to fully disable squid for XLA tests
|
||||
if [[ $TEST_CONFIG != 'xla' ]]; then
|
||||
# shellcheck disable=SC2089
|
||||
PROXY_ENV="-e http_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e https_proxy=http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128 -e no_proxy=localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock"
|
||||
fi
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e IN_CI \
|
||||
-e IS_GHA \
|
||||
-e BRANCH \
|
||||
-e SHA1 \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IN_WHEEL_TEST \
|
||||
-e SHARD_NUMBER \
|
||||
-e JOB_BASE_NAME \
|
||||
-e TEST_CONFIG \
|
||||
-e NUM_TEST_SHARDS \
|
||||
-e PR_BODY \
|
||||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CUDA \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
${PROXY_ENV} \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--ulimit stack=10485760:83886080 \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--ipc=host \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--detach \
|
||||
--name="${container_name}" \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-linux.2xlarge'
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
|
|
|
|||
496
.github/workflows/generated-win-vs2019-cpu-py3.yml
generated
vendored
496
.github/workflows/generated-win-vs2019-cpu-py3.yml
generated
vendored
|
|
@ -124,47 +124,19 @@ jobs:
|
|||
run: |
|
||||
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
rm -rf ./*
|
||||
|
||||
generate-test-matrix:
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: windows.4xlarge
|
||||
NUM_TEST_SHARDS: 2
|
||||
NUM_TEST_SHARDS_ON_PULL_REQUEST: 2
|
||||
NOGPU_RUNNER_TYPE: windows.4xlarge
|
||||
ENABLE_FORCE_ON_CPU_TEST: ''
|
||||
RUN_SMOKE_TESTS_ONLY_ON_PR: False
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: distributed
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
|
@ -223,7 +195,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
@ -239,7 +211,457 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: smoke_tests
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cpu-py3-test
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
|
|||
598
.github/workflows/generated-win-vs2019-cuda11.3-py3-smoke.yml
generated
vendored
Normal file
598
.github/workflows/generated-win-vs2019-cuda11.3-py3-smoke.yml
generated
vendored
Normal file
|
|
@ -0,0 +1,598 @@
|
|||
# @generated DO NOT EDIT MANUALLY
|
||||
# Template is at: .github/templates/windows_ci_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: win-vs2019-cuda11.3-py3-smoke
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
tags:
|
||||
- 'ciflow/all/*'
|
||||
- 'ciflow/cuda/*'
|
||||
- 'ciflow/trunk/*'
|
||||
- 'ciflow/win/*'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: win-vs2019-cuda11.3-py3-smoke
|
||||
BUILD_WHEEL: 1
|
||||
MAX_JOBS: 8
|
||||
CUDA_VERSION: "11.3"
|
||||
IN_CI: 1
|
||||
IS_GHA: 1
|
||||
INSTALL_WINDOWS_SDK: 1
|
||||
PYTHON_VERSION: "3.8"
|
||||
PYTORCH_RETRY_TEST_CASES: 1
|
||||
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
SCCACHE_BUCKET: "ossci-compiler-cache"
|
||||
VC_PRODUCT: "BuildTools"
|
||||
VC_VERSION: ""
|
||||
VS_VERSION: "16.8.6"
|
||||
VC_YEAR: "2019"
|
||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
||||
no_proxy: localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TORCH_CUDA_ARCH_LIST: "7.0"
|
||||
USE_CUDA: 1
|
||||
|
||||
concurrency:
|
||||
group: win-vs2019-cuda11.3-py3-smoke-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: "windows.4xlarge"
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-build
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
steps:
|
||||
- name: print labels
|
||||
run: echo "${PR_LABELS}"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Build
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
run: |
|
||||
.jenkins/pytorch/win-build.sh
|
||||
# Upload to github so that people can click and download artifacts
|
||||
- name: Upload artifacts to s3
|
||||
uses: seemethere/upload-artifact-s3@v3
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Cleanup build-results and workspaces
|
||||
if: always()
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
rm -rf ./*
|
||||
test_force_on_cpu_1_1:
|
||||
name: test (force_on_cpu, 1, 1, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: force_on_cpu
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-force_on_cpu-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-force_on_cpu-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: distributed
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: smoke_tests
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-smoke-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
689
.github/workflows/generated-win-vs2019-cuda11.3-py3.yml
generated
vendored
689
.github/workflows/generated-win-vs2019-cuda11.3-py3.yml
generated
vendored
|
|
@ -4,7 +4,6 @@
|
|||
name: win-vs2019-cuda11.3-py3
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
tags:
|
||||
- 'ciflow/all/*'
|
||||
|
|
@ -133,47 +132,19 @@ jobs:
|
|||
run: |
|
||||
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
rm -rf ./*
|
||||
|
||||
generate-test-matrix:
|
||||
needs: build
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
|
||||
NUM_TEST_SHARDS: 2
|
||||
NUM_TEST_SHARDS_ON_PULL_REQUEST: 0
|
||||
NOGPU_RUNNER_TYPE: windows.4xlarge
|
||||
ENABLE_FORCE_ON_CPU_TEST: 1
|
||||
RUN_SMOKE_TESTS_ONLY_ON_PR: True
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
|
||||
container:
|
||||
image: python:3.9
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pip install typing-extensions==3.10
|
||||
- name: Clone pytorch/pytorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
- name: Generating test matrix
|
||||
id: set-matrix
|
||||
run: .github/scripts/generate_pytorch_test_matrix.py
|
||||
|
||||
test:
|
||||
test_force_on_cpu_1_1:
|
||||
name: test (force_on_cpu, 1, 1, windows.4xlarge)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: force_on_cpu
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: [build, generate-test-matrix]
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
needs: build
|
||||
runs-on: windows.4xlarge
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
|
@ -207,16 +178,6 @@ jobs:
|
|||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
if: ${{ matrix.config != 'force_on_cpu' }}
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
|
|
@ -242,7 +203,7 @@ jobs:
|
|||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-force_on_cpu-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
@ -258,7 +219,639 @@ jobs:
|
|||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
|
||||
FILE_SUFFIX: '${{ github.job }}-force_on_cpu-1-1-windows.4xlarge'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_distributed_1_1:
|
||||
name: test (distributed, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: distributed
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-distributed-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_smoke_tests_1_1:
|
||||
name: test (smoke_tests, 1, 1, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 1
|
||||
TEST_CONFIG: smoke_tests
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-smoke_tests-1-1-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_1_1:
|
||||
name: test (default, 1, 2, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
SHARD_NUMBER: 1
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Reports on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path:
|
||||
test-reports-*.zip
|
||||
- name: Install render_test_results dependencies
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install junitparser==2.1.1 rich==10.9.0
|
||||
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
|
||||
if: always()
|
||||
shell: bash
|
||||
# Encoding is weird on windows, just try to default to utf-8 if possible
|
||||
env:
|
||||
PYTHONIOENCODING: "utf-8"
|
||||
run: |
|
||||
python3 tools/render_junit.py test/
|
||||
- name: Wait until all sessions have drained
|
||||
shell: powershell
|
||||
if: always()
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
.github\scripts\wait_for_ssh_to_drain.ps1
|
||||
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
|
||||
shell: powershell
|
||||
if: always()
|
||||
run: |
|
||||
.github\scripts\kill_active_ssh_sessions.ps1
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Display and upload test statistics (Click Me)
|
||||
if: always()
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
WORKFLOW_ID: '${{ github.run_id }}'
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -r requirements.txt
|
||||
python3 -m pip install boto3==1.19.12
|
||||
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
||||
- name: Cleanup workspace
|
||||
if: always()
|
||||
shell: bash
|
||||
# Should remove the entirety of pytorch-${{ github.run_id }}
|
||||
run: |
|
||||
rm -rf ./*
|
||||
test_default_2_1:
|
||||
name: test (default, 2, 2, windows.8xlarge.nvidia.gpu)
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
|
||||
SHARD_NUMBER: 2
|
||||
NUM_TEST_SHARDS: 2
|
||||
TEST_CONFIG: default
|
||||
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
needs: build
|
||||
runs-on: windows.8xlarge.nvidia.gpu
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
- name: Install Visual Studio 2019 toolchain
|
||||
shell: powershell
|
||||
run: |
|
||||
.\.circleci\scripts\vs_install.ps1
|
||||
- name: Install Cuda
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cuda_install.sh
|
||||
- name: Install Cudnn
|
||||
shell: bash
|
||||
run: |
|
||||
.circleci/scripts/windows_cudnn_install.sh
|
||||
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
path: C:\${{ github.run_id }}\build-results
|
||||
- name: Check build-results folder
|
||||
shell: powershell
|
||||
run: |
|
||||
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
||||
# Needed for coverage in win-test.sh
|
||||
- uses: actions/setup-python@v2
|
||||
name: Setup Python3
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Test
|
||||
shell: bash
|
||||
env:
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
||||
# Time out the test phase after 3.5 hours
|
||||
timeout-minutes: 210
|
||||
run: |
|
||||
.jenkins/pytorch/win-test.sh
|
||||
- name: Zip JSONs for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
- uses: seemethere/upload-artifact-s3@v3
|
||||
name: Store Test Downloaded JSONs on S3
|
||||
if: always()
|
||||
with:
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path:
|
||||
test-jsons-*.zip
|
||||
- name: Zip test reports for upload
|
||||
if: always()
|
||||
env:
|
||||
FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu'
|
||||
shell: powershell
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user