From 493bd625e252dea02e871346beaa49745b4b2663 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 10 Jul 2025 16:14:06 +0000 Subject: [PATCH] Revert "[BE]: Reduce binary size 40% using aggressive fatbin compression. (#157791)" This reverts commit 9bdf87e8918b9a3f78d7bcb8a770c19f7c82ac15. Reverted https://github.com/pytorch/pytorch/pull/157791 on behalf of https://github.com/albanD due to Reverting to avoid regressing on the driver supported ([comment](https://github.com/pytorch/pytorch/pull/157791#issuecomment-3058091176)) --- .ci/docker/ubuntu/Dockerfile | 2 +- .ci/manywheel/build_cuda.sh | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index 948598fb6b4..27c466dd8d4 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -181,7 +181,7 @@ RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; # AWS specific CUDA build guidance ENV TORCH_CUDA_ARCH_LIST Maxwell -ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all -compress-mode=size" +ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all" ENV CUDA_PATH /usr/local/cuda USER jenkins diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh index 0abeaf1e6e2..39586faa85f 100644 --- a/.ci/manywheel/build_cuda.sh +++ b/.ci/manywheel/build_cuda.sh @@ -4,7 +4,7 @@ set -ex SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))" -export TORCH_NVCC_FLAGS="-Xfatbin -compress-all -compress-mode=size" +export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" export NCCL_ROOT_DIR=/usr/local/cuda export TH_BINARY_BUILD=1 export USE_STATIC_CUDNN=1 @@ -57,14 +57,16 @@ case ${CUDA_VERSION} in #removing sm_50-sm_60 as these architectures are deprecated in CUDA 12.8/9 and will be removed in future releases #however we would like to keep sm_70 architecture see: https://github.com/pytorch/pytorch/issues/157517 12.8) - TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX" + TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0" ;; 12.9) TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX" + # WAR to resolve the ld error in libtorch build with CUDA 12.9 + if [[ "$PACKAGE_TYPE" == "libtorch" ]]; then + TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX" + fi ;; 12.6) - # CUDA 12.6 seems to have a bug which prevents aggressive compression here - export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --compress-mode=default" TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0" ;; *) @@ -112,7 +114,7 @@ DEPS_SONAME=( if [[ $CUDA_VERSION == 12* ]]; then export USE_STATIC_CUDNN=0 # Try parallelizing nvcc as well - export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --threads 2" + export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then echo "Bundling with cudnn and cublas." DEPS_LIST+=(