From 493bd625e252dea02e871346beaa49745b4b2663 Mon Sep 17 00:00:00 2001
From: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>
Date: Thu, 10 Jul 2025 16:14:06 +0000
Subject: [PATCH] Revert "[BE]: Reduce binary size 40% using aggressive fatbin
 compression. (#157791)"

This reverts commit 9bdf87e8918b9a3f78d7bcb8a770c19f7c82ac15.

Reverted https://github.com/pytorch/pytorch/pull/157791 on behalf of https://github.com/albanD due to Reverting to avoid regressing on the driver supported ([comment](https://github.com/pytorch/pytorch/pull/157791#issuecomment-3058091176))
---
 .ci/docker/ubuntu/Dockerfile |  2 +-
 .ci/manywheel/build_cuda.sh  | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
index 948598fb6b4..27c466dd8d4 100644
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@@ -181,7 +181,7 @@ RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm;
 
 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
-ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all -compress-mode=size"
+ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
 ENV CUDA_PATH /usr/local/cuda
 
 USER jenkins
diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh
index 0abeaf1e6e2..39586faa85f 100644
--- a/.ci/manywheel/build_cuda.sh
+++ b/.ci/manywheel/build_cuda.sh
@@ -4,7 +4,7 @@ set -ex
 
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))"
 
-export TORCH_NVCC_FLAGS="-Xfatbin -compress-all -compress-mode=size"
+export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
 export NCCL_ROOT_DIR=/usr/local/cuda
 export TH_BINARY_BUILD=1
 export USE_STATIC_CUDNN=1
@@ -57,14 +57,16 @@ case ${CUDA_VERSION} in
     #removing sm_50-sm_60 as these architectures are deprecated in CUDA 12.8/9 and will be removed in future releases
     #however we would like to keep sm_70 architecture see: https://github.com/pytorch/pytorch/issues/157517
     12.8)
-        TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX"
+        TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0"
         ;;
     12.9)
         TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX"
+        # WAR to resolve the ld error in libtorch build with CUDA 12.9
+        if [[ "$PACKAGE_TYPE" == "libtorch" ]]; then
+            TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX"
+        fi
         ;;
     12.6)
-        # CUDA 12.6 seems to have a bug which prevents aggressive compression here
-        export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --compress-mode=default"
         TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0"
         ;;
     *)
@@ -112,7 +114,7 @@ DEPS_SONAME=(
 if [[ $CUDA_VERSION == 12* ]]; then
     export USE_STATIC_CUDNN=0
     # Try parallelizing nvcc as well
-    export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --threads 2"
+    export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
         echo "Bundling with cudnn and cublas."
         DEPS_LIST+=(