[1/2]Add summary report for vllm build (#161565)

Demo Run https://github.com/pytorch/pytorch/actions/runs/17259533323?pr=161565 <img width="1538" height="720" alt="image" src="https://github.com/user-attachments/assets/64f6d7b4-cac6-4c12-863c-b15514bb8810" /> Pull Request resolved: https://github.com/pytorch/pytorch/pull/161565 Approved by: https://github.com/huydhn
2025-12-06 12:20:52 +01:00 · 2025-08-27 19:33:04 -07:00 · 2025-08-27 19:33:04 -07:00 · c83b43d7a8
commit c83b43d7a8
parent d3d9eb4777
8 changed files with 232 additions and 9 deletions
--- a/.ci/lumen_cli/cli/lib/common/gh_summary.py
+++ b/.ci/lumen_cli/cli/lib/common/gh_summary.py
@ -0,0 +1,143 @@
 from __future__ import annotations
 import logging
 import os
 import textwrap
 from pathlib import Path
 from typing import TYPE_CHECKING
 from cli.lib.common.utils import get_wheels
 from jinja2 import Template
 if TYPE_CHECKING:
    from collections.abc import Iterable, Mapping
 logger = logging.getLogger(__name__)
 _TPL_CONTENT = Template(
    textwrap.dedent("""\
    ## {{ title }}
    ```{{ lang }}
    {{ content }}
    ```
 """)
 )
 _TPL_LIST_ITEMS = Template(
    textwrap.dedent("""\
    ## {{ title }}
    {% for it in items %}
    - {{ it.pkg }}: {{ it.relpath }}
    {% else %}
    _(no item found)_
    {% endfor %}
    """)
 )
 _TPL_TABLE = Template(
    textwrap.dedent("""\
    {%- if rows %}
    | {{ cols | join(' | ') }} |
    |{%- for _ in cols %} --- |{%- endfor %}
    {%- for r in rows %}
    | {%- for c in cols %} {{ r.get(c, "") }} |{%- endfor %}
    {%- endfor %}
    {%- else %}
    _(no data)_
    {%- endif %}
 """)
 )
 def gh_summary_path() -> Path | None:
    """Return the Path to the GitHub step summary file, or None if not set."""
    p = os.environ.get("GITHUB_STEP_SUMMARY")
    return Path(p) if p else None
 def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
    """
    Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
    append_content: default true, if True, append to the end of the file, else overwrite the whole file
    Returns:
        True if written successfully (in GitHub Actions environment),
        False if skipped (e.g., running locally where the variable is not set).
    """
    sp = gh_summary_path()
    if not sp:
        logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
        return False
    md_clean = textwrap.dedent(md).strip() + "\n"
    mode = "a" if append_content else "w"
    with sp.open(mode, encoding="utf-8") as f:
        f.write(md_clean)
    return True
 def md_heading(text: str, level: int = 2) -> str:
    """Generate a Markdown heading string with the given level (1-6)."""
    return f"{'#' * max(1, min(level, 6))} {text}\n"
 def md_details(summary: str, content: str) -> str:
    """Generate a collapsible <details> block with a summary and inner content."""
    return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n"
 def summarize_content_from_file(
    output_dir: Path,
    freeze_file: str,
    title: str = "Content from file",
    code_lang: str = "",  # e.g. "text" or "ini"
 ) -> bool:
    f = Path(output_dir) / freeze_file
    if not f.exists():
        return False
    content = f.read_text(encoding="utf-8").strip()
    md = render_content(content, title=title, lang=code_lang)
    return write_gh_step_summary(md)
 def summarize_wheels(path: Path, title: str = "Wheels", max_depth: int = 3):
    items = get_wheels(path, max_depth=max_depth)
    if not items:
        return False
    md = render_list(items, title=title)
    return write_gh_step_summary(md)
 def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
    """
    Render a list of dicts as a Markdown table using Jinja template.
    """
    rows = list(rows)
    cols = list({k for r in rows for k in r.keys()})
    md = _TPL_TABLE.render(cols=cols, rows=rows).strip() + "\n"
    return md
 def render_list(
    items: Iterable[str],
    *,
    title: str = "List",
 ) -> str:
    tpl = _TPL_LIST_ITEMS
    md = tpl.render(title=title, items=items)
    return md
 def render_content(
    content: str,
    *,
    title: str = "Content",
    lang: str = "text",
 ) -> str:
    tpl = _TPL_CONTENT
    md = tpl.render(title=title, content=content, lang=lang)
    return md
--- a/.ci/lumen_cli/cli/lib/common/git_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/git_helper.py
@ -45,7 +45,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
        # Checkout pinned commit
        commit = get_post_build_pinned_commit(target)
-        logger.info("Checking out pinned commit %s", commit)
+        logger.info("Checking out pinned %s commit %s", target, commit)
        r.git.checkout(commit)
        # Update submodules if requested
@ -55,7 +55,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
                sm.update(init=True, recursive=True, progress=PrintProgress())
        logger.info("Successfully cloned %s", target)
-        return r
+        return r, commit
    except GitCommandError as e:
        logger.error("Git operation failed: %s", e)
--- a/.ci/lumen_cli/cli/lib/common/pip_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/pip_helper.py
@ -4,7 +4,7 @@ import shlex
 import shutil
 import sys
 from collections.abc import Iterable
-from importlib.metadata import PackageNotFoundError, version
+from importlib.metadata import PackageNotFoundError, version  # noqa: UP035
 from typing import Optional, Union
 from cli.lib.common.utils import run_command
--- a/.ci/lumen_cli/cli/lib/common/utils.py
+++ b/.ci/lumen_cli/cli/lib/common/utils.py
@ -8,6 +8,7 @@ import shlex
 import subprocess
 import sys
 from contextlib import contextmanager
 from pathlib import Path
 from typing import Optional
@ -115,3 +116,24 @@ def working_directory(path: str):
        yield
    finally:
        os.chdir(prev_cwd)
 def get_wheels(
    output_dir: Path,
    max_depth: Optional[int] = None,
 ) -> list[str]:
    """Return a list of wheels found in the given output directory."""
    root = Path(output_dir)
    if not root.exists():
        return []
    items = []
    for dirpath, _, filenames in os.walk(root):
        depth = Path(dirpath).relative_to(root).parts
        if max_depth is not None and len(depth) > max_depth:
            continue
        for fname in sorted(filenames):
            if fname.endswith(".whl"):
                pkg = fname.split("-")[0]
                relpath = str((Path(dirpath) / fname).relative_to(root))
                items.append({"pkg": pkg, "relpath": relpath})
    return items
--- a/.ci/lumen_cli/cli/lib/core/vllm/lib.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/lib.py
@ -1,13 +1,27 @@
 import logging
 import os
 import textwrap
 from typing import Any
 from cli.lib.common.gh_summary import write_gh_step_summary
 from cli.lib.common.git_helper import clone_external_repo
 from cli.lib.common.pip_helper import pip_install_packages
 from cli.lib.common.utils import run_command, temp_environ, working_directory
 from jinja2 import Template
 logger = logging.getLogger(__name__)
 _TPL_VLLM_INFO = Template(
    textwrap.dedent("""\
    ##  Vllm against Pytorch CI Test Summary
    **Vllm Commit**: [{{ vllm_commit }}](https://github.com/vllm-project/vllm/commit/{{ vllm_commit }})
    {%- if torch_sha %}
    **Pytorch Commit**: [{{ torch_sha }}](https://github.com/pytorch/pytorch/commit/{{ torch_sha }})
    {%- endif %}
 """)
 )
 def sample_vllm_test_library():
    """
@ -214,12 +228,13 @@ def run_test_plan(
 def clone_vllm(dst: str = "vllm"):
-    clone_external_repo(
+    _, commit = clone_external_repo(
        target="vllm",
        repo="https://github.com/vllm-project/vllm.git",
        dst=dst,
        update_submodules=True,
    )
    return commit
 def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:
@ -230,3 +245,12 @@ def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) ->
    for k in sorted(mapping, key=len, reverse=True):
        step = step.replace(k, mapping[k])
    return step
 def summarize_build_info(vllm_commit: str) -> bool:
    torch_sha = os.getenv("GITHUB_SHA")
    md = (
        _TPL_VLLM_INFO.render(vllm_commit=vllm_commit, torch_sha=torch_sha).strip()
        + "\n"
    )
    return write_gh_step_summary(md)
--- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
@ -13,6 +13,11 @@ from cli.lib.common.envs_helper import (
    env_str_field,
    with_params_help,
 )
 from cli.lib.common.gh_summary import (
    gh_summary_path,
    summarize_content_from_file,
    summarize_wheels,
 )
 from cli.lib.common.path_helper import (
    copy,
    ensure_dir_exists,
@ -21,7 +26,7 @@ from cli.lib.common.path_helper import (
    is_path_exist,
 )
 from cli.lib.common.utils import run_command
-from cli.lib.core.vllm.lib import clone_vllm
+from cli.lib.core.vllm.lib import clone_vllm, summarize_build_info
 logger = logging.getLogger(__name__)
@ -153,18 +158,43 @@ class VllmBuildRunner(BaseRunner):
        """
        inputs = VllmBuildParameters()
        logger.info("Running vllm build with inputs: %s", inputs)
-        clone_vllm()
+        vllm_commit = clone_vllm()
        self.cp_dockerfile_if_exist(inputs)
        # cp torch wheels from root direct to vllm workspace if exist
        self.cp_torch_whls_if_exist(inputs)
-        ensure_dir_exists(inputs.output_dir)
+        # make sure the output dir to store the build artifacts exist
        ensure_dir_exists(Path(inputs.output_dir))
        cmd = self._generate_docker_build_cmd(inputs)
        logger.info("Running docker build: \n %s", cmd)
        try:
            run_command(cmd, cwd="vllm", env=os.environ.copy())
        finally:
            self.genearte_vllm_build_summary(vllm_commit, inputs)
    def genearte_vllm_build_summary(
        self, vllm_commit: str, inputs: VllmBuildParameters
    ):
        if not gh_summary_path():
            return logger.info("Skipping, not detect GH Summary env var....")
        logger.info("Generate GH Summary ...")
        # summarize vllm build info
        summarize_build_info(vllm_commit)
        # summarize vllm build artifacts
        vllm_artifact_dir = inputs.output_dir / "wheels"
        summarize_content_from_file(
            vllm_artifact_dir,
            "build_summary.txt",
            title="Vllm build env pip package summary",
        )
        summarize_wheels(
            inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts"
        )
        summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts")
    def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str:
        if not inputs.use_torch_whl:
--- a/.github/actions/build-external-packages/action.yml
+++ b/.github/actions/build-external-packages/action.yml
@ -48,6 +48,7 @@ runs:
        BASE_IMAGE: ${{ inputs.docker-image }}
        BUILD_TARGETS: ${{ inputs.build-targets }}
        PARENT_OUTPUT_DIR: ${{ inputs.output-dir}}
      shell: bash
      run: |
        set -euo pipefail
--- a/.github/ci_configs/vllm/Dockerfile.tmp_vllm
+++ b/.github/ci_configs/vllm/Dockerfile.tmp_vllm
@ -176,6 +176,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage.
 # track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
 RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
 RUN cat torch_build_versions.txt
 RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
@ -376,6 +377,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Logging to confirm the torch versions
 RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
 RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
 ################### VLLM INSTALLED IMAGE ####################
@ -433,4 +435,5 @@ FROM scratch as export-wheels
 # Just copy the wheels we prepared in previous stages
 COPY --from=base /workspace/xformers-dist /wheels/xformers
 COPY --from=build /workspace/vllm-dist /wheels/vllm
 COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
 COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python