Restore AcceleratorAllocatorConfig to avoid potential regression (#165129)

# Motivation
This PR aims to restore `AcceleratorAllocatorConfig` to avoid the potential regression mentioned in https://github.com/pytorch/pytorch/pull/160666#issue-3323270375
These code change would be reverted in the following PR https://github.com/pytorch/pytorch/pull/165304
Pull Request resolved: https://github.com/pytorch/pytorch/pull/165129
Approved by: https://github.com/albanD
This commit is contained in:
Yu, Guangye 2025-10-14 13:29:29 +00:00 committed by PyTorch MergeBot
parent e6d9d68598
commit 7ee45f7503
2 changed files with 19 additions and 18 deletions

View File

@ -13,20 +13,22 @@ constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB
AcceleratorAllocatorConfig& AcceleratorAllocatorConfig::instance() { AcceleratorAllocatorConfig& AcceleratorAllocatorConfig::instance() {
static AcceleratorAllocatorConfig instance; static AcceleratorAllocatorConfig instance;
#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \ #define C10_ALLOCATOR_CONFIG_PARSE_ENV(env) \
auto env##_name = c10::utils::get_env(#env); \ auto env##_name = c10::utils::get_env(#env); \
if (env##_name.has_value()) { \ if (env##_name.has_value()) { \
if (deprecated) { \ instance.parseArgs(env##_name.value()); \
TORCH_WARN_ONCE(#env " is deprecated, use PYTORCH_ALLOC_CONF instead"); \ return true; \
} \
instance.parseArgs(env##_name.value()); \
return true; \
} }
static bool env_flag [[maybe_unused]] = []() { static bool env_flag [[maybe_unused]] = []() {
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF, false) // Parse allocator configuration from environment variables.
// Keep this for backwards compatibility // The first two entries are kept for backward compatibility with legacy
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF, /*deprecated=*/true) // CUDA and HIP environment variable names. The new unified variable
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF, /*deprecated=*/true) // (PYTORCH_ALLOC_CONF) should be used going forward.
// Note: keep the parsing order and logic stable to avoid potential
// performance regressions in internal tests.
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF)
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF)
C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF)
return false; return false;
}(); }();
#undef C10_ALLOCATOR_CONFIG_PARSE_ENV #undef C10_ALLOCATOR_CONFIG_PARSE_ENV
@ -127,8 +129,7 @@ size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions(
std::fill( std::fill(
std::next( std::next(
roundup_power2_divisions_.begin(), roundup_power2_divisions_.begin(),
static_cast<std::vector<size_t>::difference_type>( static_cast<std::vector<size_t>::difference_type>(last_index)),
last_index + 1)),
roundup_power2_divisions_.end(), roundup_power2_divisions_.end(),
value); value);
} else { } else {

View File

@ -67,8 +67,8 @@ TEST(AllocatorConfigTest, allocator_config_test) {
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 2); EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 2);
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4); EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4);
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 2); EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 2);
EXPECT_EQ( // EXPECT_EQ(
AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 4); // AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 4);
EXPECT_EQ( EXPECT_EQ(
AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 1); AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 1);
EXPECT_EQ( EXPECT_EQ(
@ -101,8 +101,8 @@ TEST(AllocatorConfigTest, allocator_config_test) {
EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 1); EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 1);
EXPECT_EQ( EXPECT_EQ(
AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 0); AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 0);
EXPECT_EQ( // EXPECT_EQ(
AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 8); // AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 8);
EXPECT_EQ( EXPECT_EQ(
AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 2); AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 2);