[dims] Fix large array inputs (#88596)

Variable length arguments can overflow the arena being used to keep overhead low for torch dims. If we hit this case, we know the amount of work being done is already relatively big, so we just fallback to standard memory allocation. Fixes #88586 Pull Request resolved: https://github.com/pytorch/pytorch/pull/88596 Approved by: https://github.com/ezyang
2025-12-06 12:20:52 +01:00 · 2022-12-09 16:44:31 -08:00 · 2022-12-09 16:44:31 -08:00 · 0457020d2c
commit 0457020d2c
parent bb9fc32fe0
2 changed files with 15 additions and 4 deletions
--- a/functorch/csrc/dim/arena.h
+++ b/functorch/csrc/dim/arena.h
@ -222,10 +222,13 @@ struct Arena {
        }
        int to_allocate = sizeof(T)*n;
        int to_allocate_rounded = ALIGNMENT * ((to_allocate - 1) / ALIGNMENT + 1);
-        T* result = (T*) &buffer_[allocated_];
+        auto prev_allocated = allocated_;
        allocated_ += to_allocate_rounded;
-        AT_ASSERT(allocated_ <= ARENA_MAX_SIZE);
-        return result;
+        if (C10_UNLIKELY_OR_CONST(allocated_ > ARENA_MAX_SIZE)) {
+            overflow_.emplace_back(new char[to_allocate]);
+            return (T*) &overflow_.back()[0];
+        }
+        return (T*) (buffer_ + prev_allocated);
    }
    TensorRef autorelease(at::Tensor s) {
        auto ref = TensorRef(s);
@ -251,6 +254,7 @@ private:
    char buffer_[ARENA_MAX_SIZE];
    Slice<TensorRef> ar_tensors_;
    Slice<py::handle> ar_objects_;
+    std::vector<std::unique_ptr<char[]>> overflow_;
 };

 template<typename T>
--- a/test/functorch/test_dims.py
+++ b/test/functorch/test_dims.py
@ -602,7 +602,14 @@ class TestMin(TestCase):
        t = torch.rand(3)[d]
        self.assertRaises(TypeError, lambda: t.order(wrong=3))

-
+    def test_big_split(self):
+        total = 0
+        l = []
+        while total < 6400:
+            l.append(torch.randint(2, 10, (1,)).item())
+            total += l[-1]
+        x = torch.randn(total, 1)
+        x.split(l, 0)

 skip_functorch_only = ['test_time_mm_fuse', 'test_attn_cuda']
 class TestMinFunctorchOnly(TestMin):