mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
[profiler] Directly use end_ns to create the FunctionEvent instead of using start_ns + duration_ns in pytorch profiler post processing for checking parent-child precisely (#129554)
Use the raw end_ns directly, instead of the sum of start_ns and duration_ns, in order to avoid negative CPU time in profiler. Fix https://github.com/pytorch/pytorch/issues/101861 Pull Request resolved: https://github.com/pytorch/pytorch/pull/129554 Approved by: https://github.com/gujinghui, https://github.com/aaronenyeshi
This commit is contained in:
parent
ff026f3d0a
commit
a028e5862d
|
|
@ -57,6 +57,7 @@ class _KinetoEvent:
|
|||
def device_index(self) -> int: ...
|
||||
def device_resource_id(self) -> int: ...
|
||||
def start_ns(self) -> int: ...
|
||||
def end_ns(self) -> int: ...
|
||||
def duration_ns(self) -> int: ...
|
||||
def is_async(self) -> bool: ...
|
||||
def linked_correlation_id(self) -> int: ...
|
||||
|
|
|
|||
|
|
@ -503,8 +503,8 @@ class profile:
|
|||
if _filter_name(kineto_event.name()):
|
||||
continue
|
||||
rel_start_ns = kineto_event.start_ns() - trace_start_ns
|
||||
rel_end_ns = rel_start_ns + kineto_event.duration_ns()
|
||||
abs_end_ns = kineto_event.start_ns() + kineto_event.duration_ns()
|
||||
rel_end_ns = kineto_event.end_ns() - trace_start_ns
|
||||
abs_end_ns = kineto_event.end_ns()
|
||||
|
||||
cpu_memory_usage = 0
|
||||
device_memory_usage = 0
|
||||
|
|
|
|||
|
|
@ -203,6 +203,8 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
|
|||
.def("sequence_nr", [](const KinetoEvent& e) { return e.sequenceNr(); })
|
||||
// absolute start time (since unix epoch) in ns
|
||||
.def("start_ns", [](const KinetoEvent& e) { return e.startNs(); })
|
||||
// absolute end time (since unix epoch) in ns
|
||||
.def("end_ns", [](const KinetoEvent& e) { return e.endNs(); })
|
||||
// duration in ns
|
||||
.def("duration_ns", [](const KinetoEvent& e) { return e.durationNs(); })
|
||||
// used for correlation between high-level PyTorch events
|
||||
|
|
|
|||
|
|
@ -786,6 +786,10 @@ const c10::ArrayRef<std::string> KinetoEvent::moduleHierarchy() const {
|
|||
return {};
|
||||
}
|
||||
|
||||
uint64_t KinetoEvent::endNs() const {
|
||||
return result_->endTimeNS();
|
||||
}
|
||||
|
||||
uint64_t KinetoEvent::durationNs() const {
|
||||
return (result_->endTimeNS() - result_->start_time_ns_);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ struct TORCH_API KinetoEvent {
|
|||
int deviceIndex() const;
|
||||
int64_t nBytes() const;
|
||||
uint64_t startNs() const;
|
||||
uint64_t endNs() const;
|
||||
uint64_t durationNs() const;
|
||||
bool isAsync() const;
|
||||
uint64_t correlationId() const;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user