pytorch/.github/scripts/generate_binary_build_matrix.py
atalman 995fae6060 Move small pypi build as default for linux cuda 12.1 (#114281)
This is first PR to resolve: https://github.com/pytorch/pytorch/issues/113972
Move our small wheel build as default
Test:
```
pip3 install --no-cache-dir --pre torch-2.2.0.dev20231121%2Bcu121-cp310-cp310-linux_x86_64.whl  --index-url https://download.pytorch.org/whl/nightly/cu121
Looking in indexes: https://download.pytorch.org/whl/nightly/cu121
Processing ./torch-2.2.0.dev20231121%2Bcu121-cp310-cp310-linux_x86_64.whl
Collecting filelock (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/filelock-3.9.0-py3-none-any.whl (9.7 kB)
Collecting typing-extensions>=4.8.0 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/typing_extensions-4.8.0-py3-none-any.whl (31 kB)
Collecting sympy (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/sympy-1.11.1-py3-none-any.whl (6.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.5/6.5 MB 253.4 MB/s eta 0:00:00
Collecting networkx (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/networkx-3.0rc1-py3-none-any.whl (2.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 387.1 MB/s eta 0:00:00
Collecting jinja2 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/Jinja2-3.1.2-py3-none-any.whl (133 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 133.1/133.1 kB 365.3 MB/s eta 0:00:00
Collecting fsspec (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/fsspec-2023.4.0-py3-none-any.whl (153 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 154.0/154.0 kB 370.6 MB/s eta 0:00:00
Collecting pytorch-triton==2.1.0+6e4932cda8 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/pytorch_triton-2.1.0%2B6e4932cda8-cp310-cp310-linux_x86_64.whl (125.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 125.4/125.4 MB 384.1 MB/s eta 0:00:00
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 404.9 MB/s eta 0:00:00
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 kB 402.5 MB/s eta 0:00:00
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 383.9 MB/s eta 0:00:00
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 731.7/731.7 MB 406.9 MB/s eta 0:00:00
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 388.2 MB/s eta 0:00:00
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 410.5 MB/s eta 0:00:00
Collecting nvidia-curand-cu12==10.3.2.106 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 272.9 MB/s eta 0:00:00
Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 381.5 MB/s eta 0:00:00
Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 394.6 MB/s eta 0:00:00
Collecting nvidia-nccl-cu12==2.19.3 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 166.0/166.0 MB 384.7 MB/s eta 0:00:00
Collecting nvidia-nvtx-cu12==12.1.105 (from torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 kB 281.8 MB/s eta 0:00:00
Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/cu121/nvidia_nvjitlink_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (19.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 19.8/19.8 MB 367.3 MB/s eta 0:00:00
Collecting MarkupSafe>=2.0 (from jinja2->torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)
Collecting mpmath>=0.19 (from sympy->torch==2.2.0.dev20231121+cu121)
  Downloading https://download.pytorch.org/whl/nightly/mpmath-1.2.1-py3-none-any.whl (532 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 532.6/532.6 kB 391.3 MB/s eta 0:00:00
Installing collected packages: mpmath, typing-extensions, sympy, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, networkx, MarkupSafe, fsspec, filelock, pytorch-triton, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, nvidia-cusolver-cu12, torch
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/114281
Approved by: https://github.com/malfet, https://github.com/huydhn
2023-11-22 00:10:03 +00:00

345 lines
12 KiB
Python

#!/usr/bin/env python3
"""Generates a matrix to be utilized through github actions
Will output a condensed version of the matrix if on a pull request that only
includes the latest version of python we support built on three different
architectures:
* CPU
* Latest CUDA
* Latest ROCM
"""
from typing import Dict, List, Optional, Tuple
CUDA_ARCHES = ["11.8", "12.1"]
ROCM_ARCHES = ["5.6", "5.7"]
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
CPU_AARCH64_ARCH = ["cpu-aarch64"]
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = (
"nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
"nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
)
def get_nccl_submodule_version() -> str:
from pathlib import Path
nccl_version_mk = (
Path(__file__).absolute().parent.parent.parent
/ "third_party"
/ "nccl"
/ "nccl"
/ "makefiles"
/ "version.mk"
)
if not nccl_version_mk.exists():
raise RuntimeError(
"Please make sure that nccl submodule is checked out when importing this script"
)
with nccl_version_mk.open("r") as f:
content = f.read()
d = {}
for l in content.split("\n"):
if not l.startswith("NCCL_"):
continue
(k, v) = l.split(":=")
d[k.strip()] = v.strip()
return f"{d['NCCL_MAJOR']}.{d['NCCL_MINOR']}.{d['NCCL_PATCH']}"
def get_nccl_wheel_version() -> str:
import re
requrements = map(str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS))
return [x for x in requrements if x.startswith("nvidia-nccl-cu")][0].split("==")[1]
def validate_nccl_dep_consistency() -> None:
wheel_ver = get_nccl_wheel_version()
submodule_ver = get_nccl_submodule_version()
if wheel_ver != submodule_ver:
raise RuntimeError(
f"NCCL submodule version {submodule_ver} differs from wheel version {wheel_ver}"
)
def arch_type(arch_version: str) -> str:
if arch_version in CUDA_ARCHES:
return "cuda"
elif arch_version in ROCM_ARCHES:
return "rocm"
elif arch_version in CPU_CXX11_ABI_ARCH:
return "cpu-cxx11-abi"
elif arch_version in CPU_AARCH64_ARCH:
return "cpu-aarch64"
else: # arch_version should always be "cpu" in this case
return "cpu"
# This can be updated to the release version when cutting release branch, i.e. 2.1
DEFAULT_TAG = "main"
WHEEL_CONTAINER_IMAGES = {
**{
gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in CUDA_ARCHES
},
**{
gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in ROCM_ARCHES
},
"cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
"cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
}
CONDA_CONTAINER_IMAGES = {
**{
gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in CUDA_ARCHES
},
"cpu": f"pytorch/conda-builder:cpu-{DEFAULT_TAG}",
}
PRE_CXX11_ABI = "pre-cxx11"
CXX11_ABI = "cxx11-abi"
RELEASE = "release"
DEBUG = "debug"
LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] = {
**{
(
gpu_arch,
PRE_CXX11_ABI,
): f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in CUDA_ARCHES
},
**{
(
gpu_arch,
CXX11_ABI,
): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in CUDA_ARCHES
},
**{
(
gpu_arch,
PRE_CXX11_ABI,
): f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in ROCM_ARCHES
},
**{
(
gpu_arch,
CXX11_ABI,
): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in ROCM_ARCHES
},
("cpu", PRE_CXX11_ABI): f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
}
FULL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"]
def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
return {
"cpu": "cpu",
"cpu-aarch64": "cpu",
"cpu-cxx11-abi": "cpu-cxx11-abi",
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
"rocm": f"rocm{gpu_arch_version}",
}.get(gpu_arch_type, gpu_arch_version)
def list_without(in_list: List[str], without: List[str]) -> List[str]:
return [item for item in in_list if item not in without]
def generate_conda_matrix(os: str) -> List[Dict[str, str]]:
ret: List[Dict[str, str]] = []
arches = ["cpu"]
python_versions = FULL_PYTHON_VERSIONS
if os == "linux" or os == "windows":
arches += CUDA_ARCHES
for python_version in python_versions:
# We don't currently build conda packages for rocm
for arch_version in arches:
gpu_arch_type = arch_type(arch_version)
gpu_arch_version = "" if arch_version == "cpu" else arch_version
ret.append(
{
"python_version": python_version,
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"container_image": CONDA_CONTAINER_IMAGES[arch_version],
"package_type": "conda",
"build_name": f"conda-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
".", "_"
),
}
)
return ret
def generate_libtorch_matrix(
os: str,
abi_version: str,
arches: Optional[List[str]] = None,
libtorch_variants: Optional[List[str]] = None,
) -> List[Dict[str, str]]:
if arches is None:
arches = ["cpu"]
if os == "linux":
arches += CUDA_ARCHES
arches += ROCM_ARCHES
elif os == "windows":
arches += CUDA_ARCHES
if libtorch_variants is None:
libtorch_variants = [
"shared-with-deps",
"shared-without-deps",
"static-with-deps",
"static-without-deps",
]
ret: List[Dict[str, str]] = []
for arch_version in arches:
for libtorch_variant in libtorch_variants:
# one of the values in the following list must be exactly
# CXX11_ABI, but the precise value of the other one doesn't
# matter
gpu_arch_type = arch_type(arch_version)
gpu_arch_version = "" if arch_version == "cpu" else arch_version
# ROCm builds without-deps failed even in ROCm runners; skip for now
if gpu_arch_type == "rocm" and "without-deps" in libtorch_variant:
continue
ret.append(
{
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"libtorch_variant": libtorch_variant,
"libtorch_config": abi_version if os == "windows" else "",
"devtoolset": abi_version if os != "windows" else "",
"container_image": LIBTORCH_CONTAINER_IMAGES[
(arch_version, abi_version)
]
if os != "windows"
else "",
"package_type": "libtorch",
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
".", "_"
),
}
)
return ret
def generate_wheels_matrix(
os: str,
arches: Optional[List[str]] = None,
python_versions: Optional[List[str]] = None,
) -> List[Dict[str, str]]:
package_type = "wheel"
if os == "linux" or os == "linux-aarch64":
# NOTE: We only build manywheel packages for x86_64 and aarch64 linux
package_type = "manywheel"
if python_versions is None:
python_versions = FULL_PYTHON_VERSIONS + ["3.12"]
if arches is None:
# Define default compute archivectures
arches = ["cpu"]
if os == "linux":
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES
elif os == "windows":
arches += CUDA_ARCHES
elif os == "linux-aarch64":
# Only want the one arch as the CPU type is different and
# uses different build/test scripts
arches = ["cpu-aarch64"]
ret: List[Dict[str, str]] = []
for python_version in python_versions:
for arch_version in arches:
gpu_arch_type = arch_type(arch_version)
gpu_arch_version = (
""
if arch_version == "cpu"
or arch_version == "cpu-cxx11-abi"
or arch_version == "cpu-aarch64"
else arch_version
)
# 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
if arch_version == "12.1" and os == "linux":
ret.append(
{
"python_version": python_version,
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"devtoolset": "",
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS,
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace( # noqa: B950
".", "_"
),
}
)
else:
ret.append(
{
"python_version": python_version,
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"devtoolset": "cxx11-abi"
if arch_version == "cpu-cxx11-abi"
else "",
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
".", "_"
),
"pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS
if os != "linux"
else "",
}
)
return ret
validate_nccl_dep_consistency()