pytorch/c10/core/CachingDeviceAllocator.h
2025-03-24 03:06:27 +00:00

62 lines
2.0 KiB
C++

#pragma once
#include <c10/core/Allocator.h>
namespace c10::CachingDeviceAllocator {
using namespace c10::CachingAllocator;
// Struct containing memory allocator summary statistics for a device.
struct DeviceStats {
// COUNT: allocations requested by client code
StatArray allocation;
// COUNT: number of allocated segments from device memory allocation.
StatArray segment;
// COUNT: number of active memory blocks (allocated or used by stream)
StatArray active;
// COUNT: number of inactive, split memory blocks (unallocated but can't be
// released via device memory deallocation)
StatArray inactive_split;
// SUM: bytes allocated by this memory alocator
StatArray allocated_bytes;
// SUM: bytes reserved by this memory allocator (both free and used)
StatArray reserved_bytes;
// SUM: bytes within active memory blocks
StatArray active_bytes;
// SUM: bytes within inactive, split memory blocks
StatArray inactive_split_bytes;
// SUM: bytes requested by client code
StatArray requested_bytes;
// COUNT: total number of failed calls to device malloc necessitating cache
// flushes.
int64_t num_alloc_retries = 0;
// COUNT: total number of OOMs (i.e. failed calls to device memory allocation
// after cache flush)
int64_t num_ooms = 0;
// COUNT: total number of oversize blocks allocated from pool
Stat oversize_allocations;
// COUNT: total number of oversize blocks requiring malloc
Stat oversize_segments;
// COUNT: total number of synchronize_and_free_events() calls
int64_t num_sync_all_streams = 0;
// COUNT: total number of device memory allocation calls. This includes both
// mapped and malloced memory.
int64_t num_device_alloc = 0;
// COUNT: total number of device memory deallocation calls. This includes both
// un-mapped and free memory.
int64_t num_device_free = 0;
// SIZE: maximum block size that is allowed to be split.
int64_t max_split_size = 0;
};
} // namespace c10::CachingDeviceAllocator