mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Pull Request resolved: https://github.com/pytorch/pytorch/pull/149383 Approved by: https://github.com/malfet
62 lines
2.0 KiB
C++
62 lines
2.0 KiB
C++
#pragma once
|
|
|
|
#include <c10/core/Allocator.h>
|
|
|
|
namespace c10::CachingDeviceAllocator {
|
|
|
|
using namespace c10::CachingAllocator;
|
|
|
|
// Struct containing memory allocator summary statistics for a device.
|
|
struct DeviceStats {
|
|
// COUNT: allocations requested by client code
|
|
StatArray allocation;
|
|
// COUNT: number of allocated segments from device memory allocation.
|
|
StatArray segment;
|
|
// COUNT: number of active memory blocks (allocated or used by stream)
|
|
StatArray active;
|
|
// COUNT: number of inactive, split memory blocks (unallocated but can't be
|
|
// released via device memory deallocation)
|
|
StatArray inactive_split;
|
|
|
|
// SUM: bytes allocated by this memory alocator
|
|
StatArray allocated_bytes;
|
|
// SUM: bytes reserved by this memory allocator (both free and used)
|
|
StatArray reserved_bytes;
|
|
// SUM: bytes within active memory blocks
|
|
StatArray active_bytes;
|
|
// SUM: bytes within inactive, split memory blocks
|
|
StatArray inactive_split_bytes;
|
|
// SUM: bytes requested by client code
|
|
StatArray requested_bytes;
|
|
|
|
// COUNT: total number of failed calls to device malloc necessitating cache
|
|
// flushes.
|
|
int64_t num_alloc_retries = 0;
|
|
|
|
// COUNT: total number of OOMs (i.e. failed calls to device memory allocation
|
|
// after cache flush)
|
|
int64_t num_ooms = 0;
|
|
|
|
// COUNT: total number of oversize blocks allocated from pool
|
|
Stat oversize_allocations;
|
|
|
|
// COUNT: total number of oversize blocks requiring malloc
|
|
Stat oversize_segments;
|
|
|
|
// COUNT: total number of synchronize_and_free_events() calls
|
|
int64_t num_sync_all_streams = 0;
|
|
|
|
// COUNT: total number of device memory allocation calls. This includes both
|
|
// mapped and malloced memory.
|
|
int64_t num_device_alloc = 0;
|
|
|
|
// COUNT: total number of device memory deallocation calls. This includes both
|
|
// un-mapped and free memory.
|
|
int64_t num_device_free = 0;
|
|
|
|
// SIZE: maximum block size that is allowed to be split.
|
|
int64_t max_split_size = 0;
|
|
};
|
|
|
|
} // namespace c10::CachingDeviceAllocator
|