mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Generalize support of background thread in pinned allocator (#160505)
# Motivation https://github.com/pytorch/pytorch/pull/135524 only introduces the support of background thread for CUDA, this PR intends to support it for other backend such as XPU as well. Pull Request resolved: https://github.com/pytorch/pytorch/pull/160505 Approved by: https://github.com/albanD
This commit is contained in:
parent
af3cabc55d
commit
8cfaf51d4e
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/AllocatorConfig.h>
|
||||
#include <c10/core/Stream.h>
|
||||
#include <c10/core/thread_pool.h>
|
||||
#include <c10/util/flat_hash_map.h>
|
||||
|
|
@ -351,7 +352,8 @@ struct CachingHostAllocatorImpl {
|
|||
}
|
||||
|
||||
virtual bool pinned_use_background_threads() {
|
||||
return false;
|
||||
return c10::CachingAllocator::AcceleratorAllocatorConfig::
|
||||
pinned_use_background_threads();
|
||||
}
|
||||
|
||||
virtual void copy_data(void* dest [[maybe_unused]], const void* src [[maybe_unused]], std::size_t count [[maybe_unused]]) const {
|
||||
|
|
|
|||
|
|
@ -161,11 +161,6 @@ struct CUDACachingHostAllocatorImpl
|
|||
return true;
|
||||
}
|
||||
|
||||
bool pinned_use_background_threads() override {
|
||||
return c10::CachingAllocator::AcceleratorAllocatorConfig::
|
||||
pinned_use_background_threads();
|
||||
}
|
||||
|
||||
EventPool::Event create_event_internal(DeviceIndex idx) {
|
||||
// Leak the event pool to avoid shutdown issue.
|
||||
static auto* event_pool = new EventPool();
|
||||
|
|
|
|||
|
|
@ -607,6 +607,17 @@ if __name__ == "__main__":
|
|||
z[0] = z[0] + 1.0
|
||||
self.assertEqual(z, x)
|
||||
|
||||
def test_background_thread_for_pin_memory(self):
|
||||
# Just ensure no crash
|
||||
torch._C._accelerator_setAllocatorSettings("pinned_use_background_threads:True")
|
||||
cpu_tensor = torch.randn(100)
|
||||
pin_tensor = cpu_tensor.pin_memory()
|
||||
xpu_tensor = pin_tensor.to(device="xpu", non_blocking=True)
|
||||
torch.xpu.synchronize()
|
||||
del pin_tensor
|
||||
gc.collect()
|
||||
self.assertEqual(xpu_tensor.cpu(), cpu_tensor)
|
||||
|
||||
|
||||
instantiate_device_type_tests(TestXpu, globals(), only_for="xpu", allow_xpu=True)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user