mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/60711 We already build the docs on each PR, this adds a step to push the relevant folder of the docs (we build the entire website for pytorch.github.io which clocks in at around 500 MB, but we really only need the "master" docs, not every version. The master docs by themselves are around 50 MB which is more reasonable). It uses the same S3 bucket as the artifacts but places the items at the `pytorch/pytorch/pr-previews/<pr number>` prefix. The bucket has a rule to expire resources in that prefix after 1 month. On the AWS side the bucket has static hosting enabled with CloudFront directing to the docs preview prefix, so you can see the output at `https://d28slxzaq48q8t.cloudfront.net/<pr number>/`, e.g. https://d28slxzaq48q8t.cloudfront.net/60711/. For advertising we could link this on the HUD PR page as well as in the Dr. CI comment. We could add a CNAME on CloudFront to make this be `pr-preview.pytorch.org/<pr number>` or something but having random PRs be able to host content on the pytorch.org domain seems sketchy. Test Plan: Imported from OSS Reviewed By: ZolotukhinM Differential Revision: D29398818 Pulled By: driazati fbshipit-source-id: 24032854d83815853b3650d8e54f60b684707f76
143 lines
4.6 KiB
Python
Executable File
143 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import yaml
|
|
import textwrap
|
|
import subprocess
|
|
import pathlib
|
|
import argparse
|
|
|
|
from typing import Dict, List, Any
|
|
|
|
|
|
REPO_ROOT = pathlib.Path(__file__).parent.parent.parent
|
|
CONFIG_YML = REPO_ROOT / ".circleci" / "config.yml"
|
|
WORKFLOWS_DIR = REPO_ROOT / ".github" / "workflows"
|
|
|
|
|
|
WORKFLOWS_TO_CHECK = [
|
|
"binary_builds",
|
|
"build",
|
|
"master_build",
|
|
# These are formatted slightly differently, skip them
|
|
# "scheduled-ci",
|
|
# "debuggable-scheduled-ci",
|
|
# "slow-gradcheck-scheduled-ci",
|
|
# "ecr_gc",
|
|
# "promote",
|
|
]
|
|
|
|
|
|
def add_job(
|
|
workflows: Dict[str, Any],
|
|
workflow_name: str,
|
|
type: str,
|
|
job: Dict[str, Any],
|
|
past_jobs: Dict[str, Any],
|
|
) -> None:
|
|
"""
|
|
Add job 'job' under 'type' and 'workflow_name' to 'workflow' in place. Also
|
|
add any dependencies (they must already be in 'past_jobs')
|
|
"""
|
|
if workflow_name not in workflows:
|
|
workflows[workflow_name] = {"when": "always", "jobs": []}
|
|
|
|
requires = job.get("requires", None)
|
|
if requires is not None:
|
|
for requirement in requires:
|
|
dependency = past_jobs[requirement]
|
|
add_job(workflows, dependency["workflow_name"], dependency["type"], dependency["job"], past_jobs)
|
|
|
|
workflows[workflow_name]["jobs"].append({type: job})
|
|
|
|
|
|
def get_filtered_circleci_config(
|
|
workflows: Dict[str, Any], relevant_jobs: List[str]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Given an existing CircleCI config, remove every job that's not listed in
|
|
'relevant_jobs'
|
|
"""
|
|
new_workflows: Dict[str, Any] = {}
|
|
past_jobs: Dict[str, Any] = {}
|
|
for workflow_name, workflow in workflows.items():
|
|
if workflow_name not in WORKFLOWS_TO_CHECK:
|
|
# Don't care about this workflow, skip it entirely
|
|
continue
|
|
|
|
for job_dict in workflow["jobs"]:
|
|
for type, job in job_dict.items():
|
|
if "name" not in job:
|
|
# Job doesn't have a name so it can't be handled
|
|
print("Skipping", type)
|
|
else:
|
|
if job["name"] in relevant_jobs:
|
|
# Found a job that was specified at the CLI, add it to
|
|
# the new result
|
|
add_job(new_workflows, workflow_name, type, job, past_jobs)
|
|
|
|
# Record the job in case it's needed as a dependency later
|
|
past_jobs[job["name"]] = {
|
|
"workflow_name": workflow_name,
|
|
"type": type,
|
|
"job": job,
|
|
}
|
|
|
|
return new_workflows
|
|
|
|
|
|
def commit_ci(files: List[str], message: str) -> None:
|
|
# Check that there are no other modified files than the ones edited by this
|
|
# tool
|
|
stdout = subprocess.run(["git", "status", "--porcelain"], stdout=subprocess.PIPE).stdout.decode()
|
|
for line in stdout.split("\n"):
|
|
if line == "":
|
|
continue
|
|
if line[0] != " ":
|
|
raise RuntimeError(f"Refusing to commit while other changes are already staged: {line}")
|
|
|
|
|
|
# Make the commit
|
|
subprocess.run(["git", "add"] + files)
|
|
subprocess.run(["git", "commit", "-m", message])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="make .circleci/config.yml only have a specific set of jobs and delete GitHub actions"
|
|
)
|
|
parser.add_argument("--job", action="append", help="job name", default=[])
|
|
parser.add_argument(
|
|
"--keep-gha", action="store_true", help="don't delete GitHub actions"
|
|
)
|
|
parser.add_argument(
|
|
"--make-commit", action="store_true", help="add change to git with to a do-not-merge commit"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
touched_files = [CONFIG_YML]
|
|
with open(CONFIG_YML, "r") as f:
|
|
config_yml = yaml.safe_load(f.read())
|
|
|
|
config_yml["workflows"] = get_filtered_circleci_config(config_yml["workflows"], args.job)
|
|
|
|
with open(CONFIG_YML, "w") as f:
|
|
yaml.dump(config_yml, f)
|
|
|
|
if not args.keep_gha:
|
|
for relative_file in WORKFLOWS_DIR.iterdir():
|
|
path = WORKFLOWS_DIR.joinpath(relative_file)
|
|
touched_files.append(path)
|
|
path.unlink()
|
|
|
|
if args.make_commit:
|
|
jobs_str = '\n'.join([f" * {job}" for job in args.job])
|
|
message = textwrap.dedent(f"""
|
|
[skip ci][do not merge] Edit config.yml to filter specific jobs
|
|
|
|
Filter CircleCI to only run:
|
|
{jobs_str}
|
|
|
|
See [Run Specific CI Jobs](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md#run-specific-ci-jobs) for details.
|
|
""").strip()
|
|
commit_ci([str(f.relative_to(REPO_ROOT)) for f in touched_files], message)
|