Restore AcceleratorAllocatorConfig to avoid potential regression (#165129)

# Motivation This PR aims to restore `AcceleratorAllocatorConfig` to avoid the potential regression mentioned in https://github.com/pytorch/pytorch/pull/160666#issue-3323270375 These code change would be reverted in the following PR https://github.com/pytorch/pytorch/pull/165304 Pull Request resolved: https://github.com/pytorch/pytorch/pull/165129 Approved by: https://github.com/albanD
2025-12-06 00:20:18 +01:00 · 2025-10-14 13:29:29 +00:00 · 2025-10-14 13:29:29 +00:00 · 7ee45f7503
commit 7ee45f7503
parent e6d9d68598
2 changed files with 19 additions and 18 deletions
--- a/c10/core/AllocatorConfig.cpp
+++ b/c10/core/AllocatorConfig.cpp
@ -13,20 +13,22 @@ constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB

 AcceleratorAllocatorConfig& AcceleratorAllocatorConfig::instance() {
  static AcceleratorAllocatorConfig instance;
-#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated)                       \
-  auto env##_name = c10::utils::get_env(#env);                                \
-  if (env##_name.has_value()) {                                               \
-    if (deprecated) {                                                         \
-      TORCH_WARN_ONCE(#env " is deprecated, use PYTORCH_ALLOC_CONF instead"); \
-    }                                                                         \
-    instance.parseArgs(env##_name.value());                                   \
-    return true;                                                              \
+#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env)    \
+  auto env##_name = c10::utils::get_env(#env); \
+  if (env##_name.has_value()) {                \
+    instance.parseArgs(env##_name.value());    \
+    return true;                               \
  }
  static bool env_flag [[maybe_unused]] = []() {
-    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF, false)
-    // Keep this for backwards compatibility
-    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF, /*deprecated=*/true)
-    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF, /*deprecated=*/true)
+    // Parse allocator configuration from environment variables.
+    // The first two entries are kept for backward compatibility with legacy
+    // CUDA and HIP environment variable names. The new unified variable
+    // (PYTORCH_ALLOC_CONF) should be used going forward.
+    // Note: keep the parsing order and logic stable to avoid potential
+    // performance regressions in internal tests.
+    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF)
+    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF)
+    C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF)
    return false;
  }();
 #undef C10_ALLOCATOR_CONFIG_PARSE_ENV
@ -127,8 +129,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
        std::fill(
            std::next(
                roundup_power2_divisions_.begin(),
-                static_cast<std::vector<size_t>::difference_type>(
-                    last_index + 1)),
+                static_cast<std::vector<size_t>::difference_type>(last_index)),
            roundup_power2_divisions_.end(),
            value);
      } else {
--- a/c10/test/core/AllocatorConfig_test.cpp
+++ b/c10/test/core/AllocatorConfig_test.cpp
@ -67,8 +67,8 @@ TEST(AllocatorConfigTest, allocator_config_test) {
  EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 2);
  EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4);
  EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 2);
-  EXPECT_EQ(
-      AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 4);
+  // EXPECT_EQ(
+  //     AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 4);
  EXPECT_EQ(
      AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 1);
  EXPECT_EQ(
@ -101,8 +101,8 @@ TEST(AllocatorConfigTest, allocator_config_test) {
  EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 1);
  EXPECT_EQ(
      AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 0);
-  EXPECT_EQ(
-      AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 8);
+  // EXPECT_EQ(
+  //     AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 8);
  EXPECT_EQ(
      AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 2);