[pytorch/aten] Avoid temporary array reconstruction (#72391)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/72391 Temporary array can be reused with in the loop, this will save memory reallocations and uninitialized_copy calls for the vector Test Plan: CI Reviewed By: jspark1105 Differential Revision: D34030993 fbshipit-source-id: 40708e3144c6c8f8ac3a6a45d668b34b5e52e095 (cherry picked from commit 859e126aef)
2025-12-06 12:20:52 +01:00 · 2022-02-07 14:44:03 -08:00 · 2022-02-07 14:44:03 -08:00 · 9ab71f5ac8
commit 9ab71f5ac8
parent 72cedba655
1 changed files with 3 additions and 2 deletions
--- a/aten/src/ATen/native/SortingUtils.h
+++ b/aten/src/ATen/native/SortingUtils.h
@ -104,6 +104,9 @@ void topk_impl_loop(
    const bool largest,
    const bool sorted,
    char** data, const int64_t* strides, const int64_t n) {
+
+  using elem_t = std::pair<accscalar_t, int64_t>;
+  std::vector<elem_t> queue(dim_size);
  for (const auto i : c10::irange(n)) {
    TensorAccessor<scalar_t, 1> mode_values(
        reinterpret_cast<scalar_t*>(data[0] + i * strides[0]),
@ -118,8 +121,6 @@ void topk_impl_loop(
    auto n = dim_size;
    auto use_partial_sort = k * 64 <= n;

-    using elem_t = std::pair<accscalar_t, int64_t>;
-    std::vector<elem_t> queue(n);
    for (const auto j : c10::irange(n)) {
      queue[j].first = tmp_values[j];
      queue[j].second = j;