mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[dims] Fix large array inputs (#88596)
Variable length arguments can overflow the arena being used to keep overhead low for torch dims. If we hit this case, we know the amount of work being done is already relatively big, so we just fallback to standard memory allocation. Fixes #88586 Pull Request resolved: https://github.com/pytorch/pytorch/pull/88596 Approved by: https://github.com/ezyang
This commit is contained in:
parent
bb9fc32fe0
commit
0457020d2c
|
|
@ -222,10 +222,13 @@ struct Arena {
|
|||
}
|
||||
int to_allocate = sizeof(T)*n;
|
||||
int to_allocate_rounded = ALIGNMENT * ((to_allocate - 1) / ALIGNMENT + 1);
|
||||
T* result = (T*) &buffer_[allocated_];
|
||||
auto prev_allocated = allocated_;
|
||||
allocated_ += to_allocate_rounded;
|
||||
AT_ASSERT(allocated_ <= ARENA_MAX_SIZE);
|
||||
return result;
|
||||
if (C10_UNLIKELY_OR_CONST(allocated_ > ARENA_MAX_SIZE)) {
|
||||
overflow_.emplace_back(new char[to_allocate]);
|
||||
return (T*) &overflow_.back()[0];
|
||||
}
|
||||
return (T*) (buffer_ + prev_allocated);
|
||||
}
|
||||
TensorRef autorelease(at::Tensor s) {
|
||||
auto ref = TensorRef(s);
|
||||
|
|
@ -251,6 +254,7 @@ private:
|
|||
char buffer_[ARENA_MAX_SIZE];
|
||||
Slice<TensorRef> ar_tensors_;
|
||||
Slice<py::handle> ar_objects_;
|
||||
std::vector<std::unique_ptr<char[]>> overflow_;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
|
|
|||
|
|
@ -602,7 +602,14 @@ class TestMin(TestCase):
|
|||
t = torch.rand(3)[d]
|
||||
self.assertRaises(TypeError, lambda: t.order(wrong=3))
|
||||
|
||||
|
||||
def test_big_split(self):
|
||||
total = 0
|
||||
l = []
|
||||
while total < 6400:
|
||||
l.append(torch.randint(2, 10, (1,)).item())
|
||||
total += l[-1]
|
||||
x = torch.randn(total, 1)
|
||||
x.split(l, 0)
|
||||
|
||||
skip_functorch_only = ['test_time_mm_fuse', 'test_attn_cuda']
|
||||
class TestMinFunctorchOnly(TestMin):
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user