mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Add a stack of start counter values, and attribute each disable to the last enable Differential Revision: [D40539212](https://our.internmc.facebook.com/intern/diff/D40539212/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/87904 Approved by: https://github.com/SS-JIA
73 lines
1.4 KiB
C++
73 lines
1.4 KiB
C++
#pragma once
|
|
|
|
#if defined(__ANDROID__) || defined(__linux__)
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
#include <sys/syscall.h>
|
|
|
|
#include <linux/perf_event.h>
|
|
|
|
#endif /* __ANDROID__ || __linux__ */
|
|
|
|
#include <torch/csrc/profiler/perf.h>
|
|
|
|
namespace torch {
|
|
namespace profiler {
|
|
namespace impl {
|
|
namespace linux_perf {
|
|
|
|
/*
|
|
* PerfEvent
|
|
* ---------
|
|
*/
|
|
|
|
inline void PerfEvent::Disable() const {
|
|
#if defined(__ANDROID__) || defined(__linux__)
|
|
ioctl(fd_, PERF_EVENT_IOC_DISABLE, 0);
|
|
#endif /* __ANDROID__ || __linux__ */
|
|
}
|
|
|
|
inline void PerfEvent::Enable() const {
|
|
#if defined(__ANDROID__) || defined(__linux__)
|
|
ioctl(fd_, PERF_EVENT_IOC_ENABLE, 0);
|
|
#endif /* __ANDROID__ || __linux__ */
|
|
}
|
|
|
|
inline void PerfEvent::Reset() const {
|
|
#if defined(__ANDROID__) || defined(__linux__)
|
|
ioctl(fd_, PERF_EVENT_IOC_RESET, 0);
|
|
#endif /* __ANDROID__ || __linux__ */
|
|
}
|
|
|
|
/*
|
|
* PerfProfiler
|
|
* ------------
|
|
*/
|
|
|
|
inline uint64_t PerfProfiler::CalcDelta(uint64_t start, uint64_t end) const {
|
|
if (end < start) { // overflow
|
|
return end + (std::numeric_limits<uint64_t>::max() - start);
|
|
}
|
|
// not possible to wrap around start for a 64b cycle counter
|
|
return end - start;
|
|
}
|
|
|
|
inline void PerfProfiler::StartCounting() const {
|
|
for (auto& e : events_) {
|
|
e.Enable();
|
|
}
|
|
}
|
|
|
|
inline void PerfProfiler::StopCounting() const {
|
|
for (auto& e : events_) {
|
|
e.Disable();
|
|
}
|
|
}
|
|
|
|
} // namespace linux_perf
|
|
} // namespace impl
|
|
} // namespace profiler
|
|
} // namespace torch
|