Replace insert with std::rotate_copy for RingBuffer (#165348)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165348 Approved by: https://github.com/eqy, https://github.com/Skylion007
2025-12-06 12:20:52 +01:00 · 2025-10-14 05:11:25 +00:00 · 2025-10-14 05:11:25 +00:00 · 496adf9f9c
commit 496adf9f9c
parent 33bfec27ff
1 changed files with 7 additions and 19 deletions
--- a/c10/cuda/CUDACachingAllocator.cpp
+++ b/c10/cuda/CUDACachingAllocator.cpp
@ -1080,19 +1080,12 @@ class RingBuffer {
  void getEntries(std::vector<T>& result) const {
    std::lock_guard<std::mutex> lk(alloc_trace_lock);
-    result.reserve(alloc_trace->size());
+    result.reserve(result.size() + alloc_trace->size());
-    result.insert(
+    std::rotate_copy(
        result.end(),
        alloc_trace->begin() +
            static_cast<typename std::vector<T>::difference_type>(
                alloc_trace_next),
        alloc_trace->end());
    result.insert(
        result.end(),
        alloc_trace->begin(),
-        alloc_trace->begin() +
+        std::next(alloc_trace->begin(), alloc_trace_next),
-            static_cast<typename std::vector<T>::difference_type>(
+        alloc_trace->end(),
-                alloc_trace_next));
+        std::back_inserter(result));
  }
  void clear() {
@ -4466,10 +4459,7 @@ struct BackendStaticInitializer {
          if (kv[0] == "backend") {
 #ifdef USE_ROCM
            // convenience for ROCm users to allow either CUDA or HIP env var
-            if (kv[1] ==
+            if (kv[1] == "cudaMallocAsync" || kv[1] == "hipMallocAsync")
                    "cud"
                    "aMallocAsync" ||
                kv[1] == "hipMallocAsync")
 #else
            if (kv[1] == "cudaMallocAsync")
 #endif
@ -4491,9 +4481,7 @@ struct BackendStaticInitializer {
 // HIPAllocatorMasqueradingAsCUDA because it needs to happen during static
 // initialization, and doing so there may introduce static initialization
 // order (SIOF) issues.
-#define HIP_MASQUERADING_AS_CUDA \
+#define HIP_MASQUERADING_AS_CUDA "cuda"
  "cud"                          \
  "a"
    at::SetAllocator(c10::Device(HIP_MASQUERADING_AS_CUDA).type(), r, 0);
    allocator.store(r);
 #undef HIP_MASQUERADING_AS_CUDA