diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh
index 178db42a609..a0eb0b72df2 100644
--- a/.ci/aarch64_linux/aarch64_ci_build.sh
+++ b/.ci/aarch64_linux/aarch64_ci_build.sh
@@ -31,8 +31,7 @@ pip install -r /pytorch/requirements.txt
 pip install auditwheel==6.2.0 wheel
 if [ "$DESIRED_CUDA" = "cpu" ]; then
     echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
-    #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
-    USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
+    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
 else
     echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
     export USE_SYSTEM_NCCL=1
@@ -46,6 +45,5 @@ else
         export USE_NVIDIA_PYPI_LIBS=1
     fi
 
-    #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
-    USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
+    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
 fi
diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
index 1b6429fa8c0..d4afea81ac0 100755
--- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@@ -317,7 +317,7 @@ if __name__ == "__main__":
     ).decode()
 
     print("Building PyTorch wheel")
-    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = ""
     # MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
     if enable_cuda:
         build_vars += "MAX_JOBS=5 "
diff --git a/.gitignore b/.gitignore
index 2dd40f8cfa8..ca87f1306e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -259,6 +259,9 @@ gen
 .pytest_cache
 aten/build/*
 
+# Linker scripts for prioritized text optimization
+cmake/linker_script.ld
+
 # Bram
 plsdontbreak
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4fba0eea881..8323f310fec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -380,6 +380,13 @@ cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
                        OFF "USE_CUDA" OFF)
 cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
                         "CPU_AARCH64" OFF)
+# prioritized text linker, ON by default for AArch64+Linux, option visible to all AArch64, x86 and ppc64le.
+set(USE_PRIORITIZED_TEXT_DEFAULT OFF)
+if(LINUX AND CPU_AARCH64)
+  set(USE_PRIORITIZED_TEXT_DEFAULT ON)
+endif()
+cmake_dependent_option(USE_PRIORITIZED_TEXT_FOR_LD "Use prioritized text linker for ld."
+  "${USE_PRIORITIZED_TEXT_DEFAULT}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF)
 
 option(USE_MIMALLOC "Use mimalloc" OFF)
 # Enable third party mimalloc library to improve memory allocation performance
@@ -657,6 +664,11 @@ endif(MSVC)
 
 string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
 
+# Set linker max-page-size to 64KiB on AArch64 Linux
+if(LINUX AND CPU_AARCH64)
+  add_link_options_if_supported("-z,max-page-size=0x10000")
+endif()
+
 # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
 # applicable to mobile are disabled by this variable. Setting
 # `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
@@ -1421,3 +1433,57 @@ if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
   install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
           DESTINATION "${CMAKE_INSTALL_BINDIR}")
 endif()
+
+if(USE_PRIORITIZED_TEXT_FOR_LD)
+  add_compile_options(
+    $<$<COMPILE_LANGUAGE:C,CXX>:-ffunction-sections>
+    $<$<COMPILE_LANGUAGE:C,CXX>:-fdata-sections>
+  )
+  set(LINKER_SCRIPT_FILE_OUT "${CMAKE_SOURCE_DIR}/cmake/linker_script.ld")
+  set(LINKER_SCRIPT_FILE_IN "${CMAKE_SOURCE_DIR}/cmake/prioritized_text.txt")
+
+  add_custom_command(
+    OUTPUT "${LINKER_SCRIPT_FILE_OUT}"
+    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py --filein "${LINKER_SCRIPT_FILE_IN}" --fout "${LINKER_SCRIPT_FILE_OUT}"
+    DEPENDS ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py "${LINKER_SCRIPT_FILE_IN}"
+    COMMENT "Generating prioritized text linker files"
+    VERBATIM
+  )
+
+  add_custom_target(generate_linker_script DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
+
+  if(BUILD_PYTHON)
+    set(LINKER_OPT_TARGETS torch_python)
+  endif()
+
+  if(NOT BUILD_LIBTORCHLESS)
+    list(APPEND LINKER_OPT_TARGETS torch_cpu c10)
+    if(USE_CUDA)
+      list(APPEND LINKER_OPT_TARGETS torch_cuda c10_cuda)
+    endif()
+    if(USE_XPU)
+      list(APPEND LINKER_OPT_TARGETS torch_xpu c10_xpu)
+    endif()
+    if(USE_ROCM)
+      list(APPEND LINKER_OPT_TARGETS torch_hip c10_hip)
+    endif()
+  endif()
+
+  foreach(tgt IN LISTS LINKER_OPT_TARGETS)
+    if(TARGET ${tgt})
+      add_dependencies("${tgt}" generate_linker_script)
+      target_link_options_if_supported(${tgt} "-T,${LINKER_SCRIPT_FILE_OUT}")
+      set_property(TARGET ${tgt} APPEND PROPERTY LINK_DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
+    else()
+       message(WARNING "Requested target '${tgt}' for linker script optimization was not found.")
+    endif()
+  endforeach()
+
+else()
+  if(LINUX AND CPU_AARCH64)
+    message(WARNING [[
+    It is strongly recommend to enable linker script optimization for all AArch64 Linux builds.
+    To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
+    ]])
+  endif()
+endif()
\ No newline at end of file
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index fb64e99bccf..a0bfb22bed8 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -158,6 +158,7 @@ function(caffe2_print_configuration_summary)
   if(${USE_KLEIDIAI})
     message(STATUS "  USE_KLEIDIAI          : ${USE_KLEIDIAI}")
   endif()
+  message(STATUS "  USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}")
   message(STATUS "  USE_UCC               : ${USE_UCC}")
   if(${USE_UCC})
     message(STATUS "    USE_SYSTEM_UCC        : ${USE_SYSTEM_UCC}")
diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
index 68e66bb3fc3..c96ffebf858 100644
--- a/cmake/public/utils.cmake
+++ b/cmake/public/utils.cmake
@@ -482,6 +482,7 @@ function(torch_update_find_cuda_flags)
 endfunction()
 
 include(CheckCXXCompilerFlag)
+include(CheckLinkerFlag)
 
 ##############################################################################
 # CHeck if given flag is supported and append it to provided outputvar
@@ -511,3 +512,22 @@ function(target_compile_options_if_supported target flag)
     target_compile_options(${target} PRIVATE ${flag})
   endif()
 endfunction()
+
+# Check if a global link option is supported
+function(add_link_options_if_supported flag)
+  check_linker_flag(C "LINKER:${flag}" _supported)
+  if("${_supported}")
+    add_link_options("LINKER:${flag}")
+  else()
+    message(WARNING "Attempted to use unsupported link option : ${flag}.")
+  endif()
+endfunction()
+
+function(target_link_options_if_supported tgt flag)
+  check_linker_flag(C "LINKER:${flag}" _supported)
+  if("${_supported}")
+    target_link_options("${tgt}" PRIVATE "LINKER:${flag}")
+  else()
+    message(WARNING "Attempted to use unsupported link option : ${flag}.")
+  endif()
+endfunction()
\ No newline at end of file
diff --git a/setup.py b/setup.py
index c0523a1b5c6..2bb63a93cec 100644
--- a/setup.py
+++ b/setup.py
@@ -227,9 +227,6 @@
 #      Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
 #      By default, It is only enabled on Windows.
 #
-#   USE_PRIORITIZED_TEXT_FOR_LD
-#      Uses prioritized text form cmake/prioritized_text.txt for LD
-#
 #   BUILD_LIBTORCH_WHL
 #      Builds libtorch.so and its dependencies as a wheel
 #
@@ -323,7 +320,6 @@ from tools.setup_helpers.env import (
     IS_LINUX,
     IS_WINDOWS,
 )
-from tools.setup_helpers.generate_linker_script import gen_linker_script
 
 
 def str2bool(value: str | None) -> bool:
@@ -1627,26 +1623,6 @@ def main() -> None:
     if BUILD_PYTHON_ONLY:
         install_requires += [f"{LIBTORCH_PKG_NAME}=={TORCH_VERSION}"]
 
-    if str2bool(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD")):
-        gen_linker_script(
-            filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
-        )
-        linker_script_path = os.path.abspath("cmake/linker_script.ld")
-        os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
-        os.environ["CFLAGS"] = (
-            os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
-        )
-        os.environ["CXXFLAGS"] = (
-            os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections"
-        )
-    elif platform.system() == "Linux" and platform.processor() == "aarch64":
-        print_box(
-            """
-            WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA.
-            To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
-            """
-        )
-
     # Parse the command line and check the arguments before we proceed with
     # building deps and setup. We need to set values so `--help` works.
     dist = Distribution()
diff --git a/tools/setup_helpers/generate_linker_script.py b/tools/setup_helpers/generate_linker_script.py
index e66fc197062..b5a7a4ce7de 100644
--- a/tools/setup_helpers/generate_linker_script.py
+++ b/tools/setup_helpers/generate_linker_script.py
@@ -1,5 +1,7 @@
+import argparse
 import os
 import subprocess
+from pathlib import Path
 
 
 def gen_linker_script(
@@ -28,6 +30,10 @@ def gen_linker_script(
     assert len(text_line_start) == 1, "The linker script has multiple text sections!"
     text_line_start = text_line_start[0]
 
+    # ensure that parent directory exists before writing
+    fout = Path(fout)
+    fout.parent.mkdir(parents=True, exist_ok=True)
+
     with open(fout, "w") as f:
         for lineid, line in enumerate(linker_script_lines):
             if lineid == text_line_start + 2:
@@ -36,3 +42,20 @@ def gen_linker_script(
                     f.write(f"      .text.{plines}\n")
                 f.write("    )\n")
             f.write(f"{line}\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate linker file based on prioritized symbols. Used for link-time optimization.",
+    )
+    parser.add_argument(
+        "--filein",
+        help="Path to prioritized_text.txt input file",
+        default=argparse.SUPPRESS,
+    )
+    parser.add_argument(
+        "--fout", help="Output path for linker ld file", default=argparse.SUPPRESS
+    )
+    # convert args to a dict to pass to gen_linker_script
+    kwargs = vars(parser.parse_args())
+    gen_linker_script(**kwargs)