mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Refactored CUDAEventCache from ProcessGroupNCCL.hpp/.cpp into dedicated header and implementation files for better code organization and maintainability. Split out CUDAEventCache into: - New header file: CUDAEventCache.hpp - New implementation file: CUDAEventCache.cpp - Updated build_variables.bzl to include the new file This change improves code maintainability, readability, and follows better code organization practices. --- > Generated by [Confucius Code Assist (CCA)](https://www.internalfb.com/wiki/Confucius/Analect/Shared_Analects/Confucius_Code_Assist_(CCA)/) [Session](https://www.internalfb.com/confucius?session_id=61b9029a-636b-11f0-9d9a-f1bcc55be1ce&tab=Chat), [Trace](https://www.internalfb.com/confucius?session_id=61b9029a-636b-11f0-9d9a-f1bcc55be1ce&tab=Trace) Test Plan: Verified build with: ``` buck build //caffe2/test/distributed:c10d ``` --- > Generated by [Confucius Code Assist (CCA)](https://www.internalfb.com/wiki/Confucius/Analect/Shared_Analects/Confucius_Code_Assist_(CCA)/) [Session](https://www.internalfb.com/confucius?session_id=61b9029a-636b-11f0-9d9a-f1bcc55be1ce&tab=Chat), [Trace](https://www.internalfb.com/confucius?session_id=61b9029a-636b-11f0-9d9a-f1bcc55be1ce&tab=Trace) Pull Request resolved: https://github.com/pytorch/pytorch/pull/158616 Approved by: https://github.com/fduwjj
30 lines
744 B
C++
30 lines
744 B
C++
#pragma once
|
|
|
|
#include <array>
|
|
#include <deque>
|
|
#include <memory>
|
|
#include <mutex>
|
|
|
|
#include <ATen/cuda/CUDAEvent.h>
|
|
#include <c10/macros/Export.h>
|
|
|
|
namespace c10d {
|
|
|
|
class TORCH_API CUDAEventCache
|
|
: public std::enable_shared_from_this<CUDAEventCache> {
|
|
public:
|
|
CUDAEventCache();
|
|
std::shared_ptr<at::cuda::CUDAEvent> create(bool timing);
|
|
static std::shared_ptr<CUDAEventCache> get(at::DeviceIndex device);
|
|
|
|
private:
|
|
std::mutex cacheMutex_;
|
|
// NOTE: We intentionally store raw pointers so that
|
|
// we do not attempt to destroy the event objects on process exit,
|
|
// because cuda may be gone.
|
|
std::array<std::deque<at::cuda::CUDAEvent*>, 2>
|
|
eventsArray_; // 0 for timing=false, 1 for timing=true
|
|
};
|
|
|
|
} // namespace c10d
|