mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Build vLLM aarch64 nightly wheels (#162664)
PyTorch has published its aarch64 nightly wheels for all CUDA version after https://github.com/pytorch/pytorch/pull/162364 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162664 Approved by: https://github.com/atalman
This commit is contained in:
parent
543d50db2b
commit
66133b1ab7
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
|
|
@ -21,6 +21,7 @@ self-hosted-runner:
|
||||||
- linux.arm64.2xlarge.ephemeral
|
- linux.arm64.2xlarge.ephemeral
|
||||||
- linux.arm64.m7g.4xlarge
|
- linux.arm64.m7g.4xlarge
|
||||||
- linux.arm64.m7g.4xlarge.ephemeral
|
- linux.arm64.m7g.4xlarge.ephemeral
|
||||||
|
- linux.arm64.r7g.12xlarge.memory
|
||||||
- linux.4xlarge.nvidia.gpu
|
- linux.4xlarge.nvidia.gpu
|
||||||
- linux.8xlarge.nvidia.gpu
|
- linux.8xlarge.nvidia.gpu
|
||||||
- linux.16xlarge.nvidia.gpu
|
- linux.16xlarge.nvidia.gpu
|
||||||
|
|
|
||||||
29
.github/ci_configs/vllm/Dockerfile.tmp_vllm
vendored
29
.github/ci_configs/vllm/Dockerfile.tmp_vllm
vendored
|
|
@ -82,16 +82,10 @@ RUN if command -v apt-get >/dev/null; then \
|
||||||
apt-get update -y \
|
apt-get update -y \
|
||||||
&& apt-get install -y ccache software-properties-common git curl wget sudo vim; \
|
&& apt-get install -y ccache software-properties-common git curl wget sudo vim; \
|
||||||
else \
|
else \
|
||||||
dnf install -y git curl wget sudo vim; \
|
dnf install -y git curl wget sudo; \
|
||||||
fi \
|
fi \
|
||||||
&& python3 --version && python3 -m pip --version
|
&& python3 --version && python3 -m pip --version
|
||||||
|
|
||||||
# Workaround for https://github.com/openai/triton/issues/2507 and
|
|
||||||
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
|
|
||||||
# this won't be needed for future versions of this docker image
|
|
||||||
# or future versions of triton.
|
|
||||||
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
|
||||||
|
|
||||||
# Install uv for faster pip installs if not existed
|
# Install uv for faster pip installs if not existed
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
if ! python3 -m uv --version >/dev/null 2>&1; then \
|
if ! python3 -m uv --version >/dev/null 2>&1; then \
|
||||||
|
|
@ -220,11 +214,16 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
--mount=type=bind,source=.git,target=.git \
|
--mount=type=bind,source=.git,target=.git \
|
||||||
if [ "$USE_SCCACHE" = "1" ]; then \
|
if [ "$USE_SCCACHE" = "1" ]; then \
|
||||||
echo "Installing sccache..." \
|
echo "Installing sccache..."; \
|
||||||
&& curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
|
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
||||||
|
SCCACHE_ARCHIVE="sccache-v0.8.1-aarch64-unknown-linux-musl"; \
|
||||||
|
else \
|
||||||
|
SCCACHE_ARCHIVE="sccache-v0.8.1-x86_64-unknown-linux-musl"; \
|
||||||
|
fi; \
|
||||||
|
curl -L -o sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v0.8.1/${SCCACHE_ARCHIVE}.tar.gz" \
|
||||||
&& tar -xzf sccache.tar.gz \
|
&& tar -xzf sccache.tar.gz \
|
||||||
&& sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
|
&& sudo mv "${SCCACHE_ARCHIVE}"/sccache /usr/bin/sccache \
|
||||||
&& rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
|
&& rm -rf sccache.tar.gz "${SCCACHE_ARCHIVE}" \
|
||||||
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
||||||
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
||||||
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
||||||
|
|
@ -285,7 +284,7 @@ RUN if command -v apt-get >/dev/null; then \
|
||||||
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
|
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
|
||||||
&& curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \
|
&& curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \
|
||||||
else \
|
else \
|
||||||
dnf install -y git curl wget sudo vim; \
|
dnf install -y git curl wget sudo; \
|
||||||
fi \
|
fi \
|
||||||
&& python3 --version && python3 -m pip --version
|
&& python3 --version && python3 -m pip --version
|
||||||
|
|
||||||
|
|
@ -298,12 +297,6 @@ RUN echo "[INFO] Listing current directory before torch install step:" && \
|
||||||
echo "[INFO] Showing torch_build_versions.txt content:" && \
|
echo "[INFO] Showing torch_build_versions.txt content:" && \
|
||||||
cat torch_build_versions.txt
|
cat torch_build_versions.txt
|
||||||
|
|
||||||
# Workaround for https://github.com/openai/triton/issues/2507 and
|
|
||||||
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
|
|
||||||
# this won't be needed for future versions of this docker image
|
|
||||||
# or future versions of triton.
|
|
||||||
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
|
||||||
|
|
||||||
# Install uv for faster pip installs if not existed
|
# Install uv for faster pip installs if not existed
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
if ! python3 -m uv --version > /dev/null 2>&1; then \
|
if ! python3 -m uv --version > /dev/null 2>&1; then \
|
||||||
|
|
|
||||||
3
.github/scripts/prepare_vllm_wheels.sh
vendored
3
.github/scripts/prepare_vllm_wheels.sh
vendored
|
|
@ -84,6 +84,9 @@ repackage_wheel() {
|
||||||
rm -rf $package
|
rm -rf $package
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Require to re-package the wheel
|
||||||
|
${PYTHON_EXECUTABLE} -mpip install wheel==0.45.1
|
||||||
|
|
||||||
pushd externals/vllm/wheels
|
pushd externals/vllm/wheels
|
||||||
for package in xformers flashinfer-python vllm; do
|
for package in xformers flashinfer-python vllm; do
|
||||||
repackage_wheel $package
|
repackage_wheel $package
|
||||||
|
|
|
||||||
41
.github/workflows/build-vllm-wheel.yml
vendored
41
.github/workflows/build-vllm-wheel.yml
vendored
|
|
@ -12,6 +12,9 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- .github/workflows/build-vllm-wheel.yml
|
- .github/workflows/build-vllm-wheel.yml
|
||||||
- .github/ci_commit_pins/vllm.txt
|
- .github/ci_commit_pins/vllm.txt
|
||||||
|
schedule:
|
||||||
|
# every morning at 01:30PM UTC, 9:30AM EST, 6:30AM PST
|
||||||
|
- cron: 30 13 * * *
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
|
|
@ -24,21 +27,33 @@ jobs:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: [ '3.12' ]
|
python-version: [ '3.12' ]
|
||||||
# TODO (huydhn): Add cu130 https://github.com/pytorch/pytorch/pull/162000#issuecomment-3261541554
|
# TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved
|
||||||
|
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
|
||||||
device: [ 'cu128', 'cu129' ]
|
device: [ 'cu128', 'cu129' ]
|
||||||
runner: [ 'linux.12xlarge.memory' ]
|
|
||||||
include:
|
include:
|
||||||
- device: cu128
|
- platform: manylinux_2_28_x86_64
|
||||||
|
device: cu128
|
||||||
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8'
|
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8'
|
||||||
- device: cu129
|
runner: linux.12xlarge.memory
|
||||||
|
- platform: manylinux_2_28_x86_64
|
||||||
|
device: cu129
|
||||||
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9'
|
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9'
|
||||||
name: "Build ${{ matrix.device }} vLLM wheel"
|
runner: linux.12xlarge.memory
|
||||||
|
- platform: manylinux_2_28_aarch64
|
||||||
|
device: cu128
|
||||||
|
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8'
|
||||||
|
runner: linux.arm64.r7g.12xlarge.memory
|
||||||
|
- platform: manylinux_2_28_aarch64
|
||||||
|
device: cu129
|
||||||
|
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9'
|
||||||
|
runner: linux.arm64.r7g.12xlarge.memory
|
||||||
|
name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
|
||||||
runs-on: ${{ matrix.runner }}
|
runs-on: ${{ matrix.runner }}
|
||||||
timeout-minutes: 480
|
timeout-minutes: 480
|
||||||
env:
|
env:
|
||||||
PY_VERS: ${{ matrix.python-version }}
|
PY_VERS: ${{ matrix.python-version }}
|
||||||
MANYLINUX_IMAGE: ${{ matrix.manylinux-image }}
|
MANYLINUX_IMAGE: ${{ matrix.manylinux-image }}
|
||||||
PLATFORM: 'manylinux_2_28_x86_64'
|
PLATFORM: ${{ matrix.platform }}
|
||||||
BUILD_DEVICE: ${{ matrix.device }}
|
BUILD_DEVICE: ${{ matrix.device }}
|
||||||
steps:
|
steps:
|
||||||
- name: Setup SSH (Click me for login details)
|
- name: Setup SSH (Click me for login details)
|
||||||
|
|
@ -136,7 +151,7 @@ jobs:
|
||||||
|
|
||||||
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
|
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
|
||||||
with:
|
with:
|
||||||
name: vllm-wheel-${{ matrix.device }}-${{ matrix.python-version }}-${{ env.PLATFORM }}
|
name: vllm-wheel-${{ matrix.device }}-${{ matrix.platform }}-${{ matrix.python-version }}
|
||||||
if-no-files-found: error
|
if-no-files-found: error
|
||||||
path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl
|
path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl
|
||||||
|
|
||||||
|
|
@ -146,15 +161,17 @@ jobs:
|
||||||
|
|
||||||
# Copied from build-triton-wheel workflow (mostly)
|
# Copied from build-triton-wheel workflow (mostly)
|
||||||
upload-wheel:
|
upload-wheel:
|
||||||
name: "Upload ${{ matrix.device }} vLLM wheel"
|
name: "Upload ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
|
||||||
needs:
|
needs:
|
||||||
- build-wheel
|
- build-wheel
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
|
||||||
device: [ 'cu128', 'cu129' ]
|
device: [ 'cu128', 'cu129' ]
|
||||||
env:
|
env:
|
||||||
|
PLATFORM: ${{ matrix.platform }}
|
||||||
BUILD_DEVICE: ${{ matrix.device }}
|
BUILD_DEVICE: ${{ matrix.device }}
|
||||||
permissions:
|
permissions:
|
||||||
id-token: write
|
id-token: write
|
||||||
|
|
@ -190,15 +207,15 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
set -eux
|
set -eux
|
||||||
mkdir -p "${RUNNER_TEMP}/artifacts/"
|
mkdir -p "${RUNNER_TEMP}/artifacts/"
|
||||||
mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-*/* "${RUNNER_TEMP}/artifacts/"
|
mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-"${PLATFORM}"-*/* "${RUNNER_TEMP}/artifacts/"
|
||||||
|
|
||||||
- name: Set DRY_RUN (only for tagged pushes)
|
- name: Set DRY_RUN
|
||||||
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
|
if: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
- name: Set UPLOAD_CHANNEL
|
||||||
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
|
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user