mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: This is a re-attempt to land the iwyu header changes, by taking the diff from [PR 100304](https://github.com/pytorch/pytorch/pull/100304), and adding the bare minimal changes to make the diff build corectly in the internal builds. X-link: https://github.com/facebookresearch/pytorch3d/pull/1541 X-link: https://github.com/fairinternal/pytorch3d/pull/44 - Re-work D45769819 to fix header inclusions in c10 Test Plan: ``` buck2 build --no-remote-cache mode/dev-nosan //caffe2/c10/... buck2 build --no-remote-cache mode/dev-nosan //deeplearning/fbgemm/fbgemm_gpu/... buck2 build mode/dev-nosan //vision/fair/pytorch3d/pytorch3d:_C ``` Reviewed By: malfet Differential Revision: D45920611 Pull Request resolved: https://github.com/pytorch/pytorch/pull/101846 Approved by: https://github.com/malfet, https://github.com/Skylion007
150 lines
4.6 KiB
C++
150 lines
4.6 KiB
C++
#pragma once
|
|
|
|
#include <c10/util/flat_hash_map.h>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
namespace c10 {
|
|
|
|
/*
|
|
* Given a sequence of allocations in a thread, AllocationPlan records
|
|
* 1. size of each allocation
|
|
* 2. Lifetime of each allocation.
|
|
* 3. allocation offsets: Memory offset for each allocation in a single blob of
|
|
* memory
|
|
* 4. Total size of a blob of memory required to satisfy all the allocations.
|
|
*/
|
|
class C10_API AllocationPlan {
|
|
private:
|
|
// Records size of each allocation by their sequential allocation ids.
|
|
std::vector<uint64_t> allocation_sizes;
|
|
// This maps one allocation id (X) to another allocation id (Y).
|
|
// Allocation X is alive until allocation Y. From allocation Y onwards
|
|
// allocation X is not referenced.
|
|
// Thus Y is the id of the first allocation after X is freed.
|
|
// NB: When an allocation is recorded, along with recording its size,
|
|
// we also set the lifetime to be numeric_limits::max()
|
|
// This is to track allocations that are made during the scope of
|
|
// profiling but were not freed until after the scope ended.
|
|
// Such allocations are not managed by profiling allocator.
|
|
std::vector<uint64_t> allocation_lifetimes;
|
|
// Maps an allocation to some offset in a blob of memory.
|
|
std::vector<uint64_t> allocation_offsets;
|
|
uint64_t total_size{0};
|
|
void clear();
|
|
friend class AllocationPlanner;
|
|
friend class CPUProfilingAllocator;
|
|
};
|
|
|
|
/*
|
|
* Map of memory ptr to allocation id. This is auxiliary information only
|
|
* used to establish lifetime of allocations.
|
|
*/
|
|
class C10_API AllocationPlanner {
|
|
private:
|
|
AllocationPlan* allocation_plan_{nullptr};
|
|
// Maps allocated ptr to its allocation id.
|
|
// This is used when freeing the memory to look up the allocation id
|
|
// in order to establish the lifetime of a particular allocation.
|
|
ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
|
|
uint64_t allocation_id_{0};
|
|
bool validation_mode_{false};
|
|
|
|
bool validate_allocation(const uint64_t size, const void* ptr);
|
|
bool validate_free(const void* ptr);
|
|
|
|
public:
|
|
bool validation_success{true};
|
|
|
|
AllocationPlanner() = delete;
|
|
AllocationPlanner(AllocationPlan* plan, bool validate = false)
|
|
: allocation_plan_(plan), validation_mode_(validate) {}
|
|
void record_allocation(const uint64_t size, const void* ptr);
|
|
void record_free(const void* ptr);
|
|
void formulate_plan();
|
|
void clear();
|
|
};
|
|
|
|
// NOT THREAD SAFE profiling allocator.
|
|
class C10_API CPUProfilingAllocator {
|
|
private:
|
|
const AllocationPlan* plan_{nullptr};
|
|
uint64_t allocation_id_{0};
|
|
uint64_t current_size_{0};
|
|
void* blob_{nullptr};
|
|
ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
|
|
|
|
public:
|
|
~CPUProfilingAllocator();
|
|
void set_plan(const AllocationPlan* plan);
|
|
void unset_plan();
|
|
void* allocate(const size_t bytes);
|
|
void free(void* const ptr);
|
|
};
|
|
|
|
/*
|
|
* Usage: Profile allocations made by one run of the model.
|
|
* AllocationPlan plan;
|
|
* {
|
|
* WithProfileAllocationGuard profile_guard(&plan);
|
|
* module.forward(...);
|
|
* }
|
|
* plan now contains allocation plan.
|
|
*/
|
|
class C10_API WithProfileAllocationsGuard {
|
|
public:
|
|
WithProfileAllocationsGuard(AllocationPlan* plan);
|
|
~WithProfileAllocationsGuard();
|
|
|
|
private:
|
|
std::unique_ptr<AllocationPlanner> planner_;
|
|
};
|
|
|
|
/*
|
|
* Usage: Validate allocation plan made with WithProfileAllocationGuard
|
|
* bool plan_validation_success, success = true;
|
|
* for (some number of representative inputs)
|
|
* {
|
|
* WithValidateAllocationPlanGuard(&plan, &plan_validation_success);
|
|
* module.forward(...);
|
|
* success = success && plan_validation_success;
|
|
* }
|
|
* success == true means allocations are according to plan
|
|
* else for some inputs allocation pattern changed.
|
|
*/
|
|
class C10_API WithValidateAllocationPlanGuard {
|
|
public:
|
|
WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success);
|
|
~WithValidateAllocationPlanGuard();
|
|
|
|
private:
|
|
std::unique_ptr<AllocationPlanner> planner_;
|
|
bool* success_;
|
|
};
|
|
|
|
AllocationPlanner* GetThreadLocalAllocationPlanner();
|
|
|
|
/*
|
|
* Usage: Allocate tensors accordingly to allocation plan
|
|
* First make allocation plan.
|
|
* See WithProfileAllocationsGuard usage.
|
|
* Second validate allocation plan.
|
|
* See WithValidateAllocationPlanGuard usage.
|
|
* CPUProfilingAllocator profiling_allocator;
|
|
* {
|
|
* WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan);
|
|
* module.forward(...);
|
|
* }
|
|
*/
|
|
class C10_API WithProfilingAllocatorGuard {
|
|
public:
|
|
WithProfilingAllocatorGuard(
|
|
CPUProfilingAllocator* allocator,
|
|
const AllocationPlan* plan);
|
|
~WithProfilingAllocatorGuard();
|
|
};
|
|
|
|
CPUProfilingAllocator* GetThreadLocalProfilingAllocator();
|
|
|
|
} // namespace c10
|