mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[CI] Reuse old whl (#153838)
~50% of commits on main only touch python files unrelated to the object files in the whl, meaning that we could reuse old whls and put the current commit's python files into the whl. This PR does that in CI by identifying a previous job whose artifact and whls binaries can be reused. See https://docs.google.com/document/d/1nQ1FNJqnJuSFRiM2HvQ27zg6Vm-77n7LECp30zYfTDk/edit?tab=t.icom2lesr6es for more details? To reuse: * the changed files between the whl's commit and the current commit can only be python files in test/ or torch/ and not in torch/csrc * not on main branch or release branch * ci-force-rebuild not on PR * special abort issue is closed * artifact should exist Pros: * build time -> 6 min whenever this can be done Cons: * not sure if I have the right files * version + whl name still remains the same Testing: Unfortunately this PR's changed files are not on the list of acceptable changed files for reusing the whl, so I've been mangling it on other PRs to get things like https://github.com/pytorch/pytorch/actions/runs/15119214901/job/42497650394?pr=147470 (It is enabled on linux-focal-cuda12.6-py3.10-gcc11 / build and there are changes in common_utils.py to make sure the copying of python takes effect) Pull Request resolved: https://github.com/pytorch/pytorch/pull/153838 Approved by: https://github.com/malfet
This commit is contained in:
parent
9180bb187c
commit
cc48550e6f
38
.github/actions/reuse-old-whl/action.yml
vendored
Normal file
38
.github/actions/reuse-old-whl/action.yml
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
name: Reuse old wheel if possible
|
||||
|
||||
description:
|
||||
Reuse old wheel if possible
|
||||
|
||||
inputs:
|
||||
build-environment:
|
||||
description: Build environment
|
||||
required: true
|
||||
run-id:
|
||||
description: Workflow run ID
|
||||
required: true
|
||||
github-token:
|
||||
description: GitHub token
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
reuse:
|
||||
description: Whether the wheel is reused or not
|
||||
value: ${{ steps.check-file-changes.outputs.reuse }}
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
|
||||
steps:
|
||||
# Check out pytorch with fetch depth 0
|
||||
- name: Check file changes
|
||||
id: check-file-changes
|
||||
shell: bash
|
||||
continue-on-error: true
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ inputs.github-token }}
|
||||
run: |
|
||||
set -x
|
||||
python3 ${GITHUB_ACTION_PATH}/reuse_old_whl.py \
|
||||
--build-environment "${{ inputs.build-environment }}" \
|
||||
--run-id "${{ inputs.run-id }}" \
|
||||
--github-ref "${{ github.ref }}"
|
||||
280
.github/actions/reuse-old-whl/reuse_old_whl.py
vendored
Normal file
280
.github/actions/reuse-old-whl/reuse_old_whl.py
vendored
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any, cast, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
FORCE_REBUILD_LABEL = "ci-force-rebuild"
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_merge_base() -> str:
|
||||
merge_base = subprocess.check_output(
|
||||
["git", "merge-base", "HEAD", "origin/main"],
|
||||
text=True,
|
||||
stderr=subprocess.DEVNULL,
|
||||
).strip()
|
||||
return merge_base
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_head_sha() -> str:
|
||||
sha = subprocess.check_output(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
text=True,
|
||||
stderr=subprocess.DEVNULL,
|
||||
).strip()
|
||||
return sha
|
||||
|
||||
|
||||
def is_main_branch() -> bool:
|
||||
return False
|
||||
# Testing on main branch for now
|
||||
# print(
|
||||
# f"Checking if we are on main branch: merge base {get_merge_base()}, head {get_head_sha()}"
|
||||
# )
|
||||
# return get_merge_base() == get_head_sha()
|
||||
|
||||
|
||||
def query_github_api(url: str) -> Any:
|
||||
headers = {
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}",
|
||||
}
|
||||
response = requests.get(url, headers=headers)
|
||||
return response.json()
|
||||
|
||||
|
||||
@lru_cache
|
||||
def check_labels_for_pr() -> bool:
|
||||
# Check if the current commit is part of a PR and if it has the
|
||||
# FORCE_REBUILD_LABEL
|
||||
head_sha = get_head_sha()
|
||||
url = f"https://api.github.com/repos/pytorch/pytorch/commits/{head_sha}/pulls"
|
||||
response = query_github_api(url)
|
||||
|
||||
print(
|
||||
f"Found {len(response)} PRs for commit {head_sha}: {[pr['number'] for pr in response]}"
|
||||
)
|
||||
for pr in response:
|
||||
labels = pr.get("labels", [])
|
||||
for label in labels:
|
||||
if label["name"] == FORCE_REBUILD_LABEL:
|
||||
print(f"Found label {FORCE_REBUILD_LABEL} in PR {pr['number']}.")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_issue_open() -> bool:
|
||||
# Check if issue #153759 is open. This is the config issue for quickly
|
||||
# forcing everyone to build
|
||||
url = "https://api.github.com/repos/pytorch/pytorch/issues/153759"
|
||||
response = query_github_api(url)
|
||||
if response.get("state") == "open":
|
||||
print("Issue #153759 is open.")
|
||||
return True
|
||||
else:
|
||||
print("Issue #153759 is not open.")
|
||||
return False
|
||||
|
||||
|
||||
def get_workflow_id(run_id: str) -> Optional[str]:
|
||||
# Get the workflow ID that corresponds to the file for the run ID
|
||||
url = f"https://api.github.com/repos/pytorch/pytorch/actions/runs/{run_id}"
|
||||
response = query_github_api(url)
|
||||
if "workflow_id" in response:
|
||||
print(f"Found workflow ID for run ID {run_id}: {response['workflow_id']}")
|
||||
return cast(str, response["workflow_id"])
|
||||
else:
|
||||
print("No workflow ID found.")
|
||||
return None
|
||||
|
||||
|
||||
def ok_changed_file(file: str) -> bool:
|
||||
# Return true if the file is in the list of allowed files to be changed to
|
||||
# reuse the old whl
|
||||
if (
|
||||
file.startswith("torch/")
|
||||
and file.endswith(".py")
|
||||
and not file.startswith("torch/csrc/")
|
||||
):
|
||||
return True
|
||||
if file.startswith("test/") and file.endswith(".py"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_changed_files(sha: str) -> bool:
|
||||
# Return true if all the changed files are in the list of allowed files to
|
||||
# be changed to reuse the old whl
|
||||
changed_files = (
|
||||
subprocess.check_output(
|
||||
["git", "diff", "--name-only", sha, "HEAD"],
|
||||
text=True,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
.strip()
|
||||
.split()
|
||||
)
|
||||
print(f"Checking changed files between {sha} and HEAD:")
|
||||
for file in changed_files:
|
||||
if not ok_changed_file(file):
|
||||
print(f" File {file} is not allowed to be changed.")
|
||||
return False
|
||||
else:
|
||||
print(f" File {file} is allowed to be changed.")
|
||||
return True
|
||||
|
||||
|
||||
def find_old_whl(workflow_id: str, build_environment: str, sha: str) -> bool:
|
||||
# Find the old whl on s3 and download it to artifacts.zip
|
||||
if build_environment is None:
|
||||
print("BUILD_ENVIRONMENT is not set.")
|
||||
return False
|
||||
print(f"SHA: {sha}, workflow_id: {workflow_id}")
|
||||
|
||||
workflow_runs = query_github_api(
|
||||
f"https://api.github.com/repos/pytorch/pytorch/actions/workflows/{workflow_id}/runs?head_sha={sha}&branch=main&per_page=100"
|
||||
)
|
||||
if workflow_runs.get("total_count", 0) == 0:
|
||||
print("No workflow runs found.")
|
||||
return False
|
||||
for run in workflow_runs.get("workflow_runs", []):
|
||||
# Look in s3 for the old whl
|
||||
run_id = run["id"]
|
||||
try:
|
||||
url = f"https://gha-artifacts.s3.amazonaws.com/pytorch/pytorch/{run_id}/{build_environment}/artifacts.zip"
|
||||
print(f"Checking for old whl at {url}")
|
||||
response = requests.get(
|
||||
url,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
with open("artifacts.zip", "wb") as f:
|
||||
f.write(response.content)
|
||||
print(f"Found old whl file from s3: {url}")
|
||||
return True
|
||||
except requests.RequestException as e:
|
||||
print(f"Error checking for old whl: {e}")
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def unzip_artifact_and_replace_files() -> None:
|
||||
# Unzip the artifact and replace files
|
||||
subprocess.check_output(
|
||||
["unzip", "-o", "artifacts.zip", "-d", "artifacts"],
|
||||
)
|
||||
os.remove("artifacts.zip")
|
||||
|
||||
# Rename wheel into zip
|
||||
wheel_path = Path("artifacts/dist").glob("*.whl")
|
||||
for path in wheel_path:
|
||||
new_path = path.with_suffix(".zip")
|
||||
os.rename(path, new_path)
|
||||
print(f"Renamed {path} to {new_path}")
|
||||
print(new_path.stem)
|
||||
# Unzip the wheel
|
||||
subprocess.check_output(
|
||||
["unzip", "-o", new_path, "-d", f"artifacts/dist/{new_path.stem}"],
|
||||
)
|
||||
# Copy python files into the artifact
|
||||
subprocess.check_output(
|
||||
["rsync", "-avz", "torch", f"artifacts/dist/{new_path.stem}"],
|
||||
)
|
||||
|
||||
# Zip the wheel back
|
||||
subprocess.check_output(
|
||||
["zip", "-r", f"{new_path.stem}.zip", "."],
|
||||
cwd=f"artifacts/dist/{new_path.stem}",
|
||||
)
|
||||
subprocess.check_output(
|
||||
[
|
||||
"mv",
|
||||
f"artifacts/dist/{new_path.stem}/{new_path.stem}.zip",
|
||||
f"artifacts/dist/{new_path.stem}.whl",
|
||||
],
|
||||
)
|
||||
|
||||
# Remove the extracted folder
|
||||
subprocess.check_output(
|
||||
["rm", "-rf", f"artifacts/dist/{new_path.stem}"],
|
||||
)
|
||||
|
||||
# Rezip the artifact
|
||||
subprocess.check_output(["zip", "-r", "artifacts.zip", "."], cwd="artifacts")
|
||||
subprocess.check_output(
|
||||
["mv", "artifacts/artifacts.zip", "."],
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def set_output() -> None:
|
||||
# Disable for now so we can monitor first
|
||||
# pass
|
||||
if os.getenv("GITHUB_OUTPUT"):
|
||||
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
|
||||
print("reuse=true", file=env)
|
||||
else:
|
||||
print("::set-output name=reuse::true")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Check for old whl files.")
|
||||
parser.add_argument("--run-id", type=str, required=True, help="Workflow ID")
|
||||
parser.add_argument(
|
||||
"--build-environment", type=str, required=True, help="Build environment"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--github-ref",
|
||||
type=str,
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def can_reuse_whl(args: argparse.Namespace) -> bool:
|
||||
# if is_main_branch() or (
|
||||
# args.github_ref
|
||||
# and any(
|
||||
# args.github_ref.startswith(x)
|
||||
# for x in ["refs/heads/release", "refs/tags/v", "refs/heads/main"]
|
||||
# )
|
||||
# ):
|
||||
# print("On main branch or release branch, rebuild whl")
|
||||
# return False
|
||||
|
||||
if check_labels_for_pr():
|
||||
print(f"Found {FORCE_REBUILD_LABEL} label on PR, rebuild whl")
|
||||
return False
|
||||
|
||||
if check_issue_open():
|
||||
print("Issue #153759 is open, rebuild whl")
|
||||
return False
|
||||
|
||||
if not check_changed_files(get_merge_base()):
|
||||
print("Cannot use old whl due to the changed files, rebuild whl")
|
||||
return False
|
||||
|
||||
workflow_id = get_workflow_id(args.run_id)
|
||||
if workflow_id is None:
|
||||
print("No workflow ID found, rebuild whl")
|
||||
return False
|
||||
|
||||
if not find_old_whl(workflow_id, args.build_environment, get_merge_base()):
|
||||
print("No old whl found, rebuild whl")
|
||||
# TODO: go backwards from merge base to find more runs
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
if can_reuse_whl(args):
|
||||
print("Reusing old whl")
|
||||
unzip_artifact_and_replace_files()
|
||||
set_output()
|
||||
31
.github/workflows/_linux-build.yml
vendored
31
.github/workflows/_linux-build.yml
vendored
|
|
@ -93,6 +93,14 @@ on:
|
|||
type: number
|
||||
default: 1
|
||||
|
||||
allow-reuse-old-whl:
|
||||
description: |
|
||||
If set, the build try to pull an old wheel from s3 that was built on a
|
||||
commit with no cpp changes from this commit
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
secrets:
|
||||
HUGGING_FACE_HUB_TOKEN:
|
||||
required: false
|
||||
|
|
@ -150,6 +158,15 @@ jobs:
|
|||
role-session-name: gha-linux-build
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Check if can use old whl build
|
||||
id: use-old-whl
|
||||
uses: ./.github/actions/reuse-old-whl
|
||||
if: ${{ inputs.allow-reuse-old-whl && github.event_name == 'push' }}
|
||||
with:
|
||||
build-environment: ${{ inputs.build-environment }}
|
||||
run-id: ${{ github.run_id }}
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
|
|
@ -159,7 +176,7 @@ jobs:
|
|||
|
||||
- name: Use following to pull public copy of the image
|
||||
id: print-ghcr-mirror
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.reuse != 'true'
|
||||
env:
|
||||
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
shell: bash
|
||||
|
|
@ -169,7 +186,7 @@ jobs:
|
|||
|
||||
- name: Pull docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.reuse != 'true'
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
|
|
@ -218,14 +235,14 @@ jobs:
|
|||
- name: Download pytest cache
|
||||
uses: ./.github/actions/pytest-cache-download
|
||||
continue-on-error: true
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.reuse != 'true'
|
||||
with:
|
||||
cache_dir: .pytest_cache
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
s3_bucket: ${{ inputs.s3-bucket }}
|
||||
|
||||
- name: Build
|
||||
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
|
||||
if: (steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '') && steps.use-old-whl.outputs.reuse != 'true'
|
||||
id: build
|
||||
env:
|
||||
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
|
||||
|
|
@ -329,13 +346,13 @@ jobs:
|
|||
kill "$MONITOR_SCRIPT_PID"
|
||||
|
||||
- name: Archive artifacts into zip
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && steps.use-old-whl.outputs.reuse != 'true'
|
||||
run: |
|
||||
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files
|
||||
|
||||
- name: Store PyTorch Build Artifacts on S3
|
||||
uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-generates-artifacts && (steps.build.outcome != 'skipped' || steps.use-old-whl.outputs.reuse == 'true') && inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
with:
|
||||
name: ${{ inputs.build-environment }}
|
||||
retention-days: 14
|
||||
|
|
@ -345,7 +362,7 @@ jobs:
|
|||
|
||||
- name: Store PyTorch Build Artifacts for s390x
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.build-environment == 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-generates-artifacts && (steps.build.outcome != 'skipped' || steps.use-old-whl.outputs.reuse == 'true') && inputs.build-environment == 'linux-s390x-binary-manywheel'
|
||||
with:
|
||||
name: ${{ inputs.build-environment }}
|
||||
retention-days: 14
|
||||
|
|
|
|||
2
.github/workflows/pull.yml
vendored
2
.github/workflows/pull.yml
vendored
|
|
@ -296,6 +296,7 @@ jobs:
|
|||
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
allow-reuse-old-whl: true
|
||||
secrets: inherit
|
||||
|
||||
linux-focal-cuda12_6-py3_10-gcc11-test:
|
||||
|
|
@ -469,6 +470,7 @@ jobs:
|
|||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
]}
|
||||
allow-reuse-old-whl: true
|
||||
secrets: inherit
|
||||
|
||||
linux-focal-cuda12_6-py3_10-gcc11-sm89-test:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user