[profiler] Directly use end_ns to create the FunctionEvent instead of using start_ns + duration_ns in pytorch profiler post processing for checking parent-child precisely (#129554)

Use the raw end_ns directly, instead of the sum of start_ns and duration_ns, in order to avoid negative CPU time in profiler.

Fix https://github.com/pytorch/pytorch/issues/101861

Pull Request resolved: https://github.com/pytorch/pytorch/pull/129554
Approved by: https://github.com/gujinghui, https://github.com/aaronenyeshi
This commit is contained in:
Chen, Zejun 2024-06-27 10:46:03 +00:00 committed by PyTorch MergeBot
parent ff026f3d0a
commit a028e5862d
5 changed files with 10 additions and 2 deletions

View File

@ -57,6 +57,7 @@ class _KinetoEvent:
def device_index(self) -> int: ...
def device_resource_id(self) -> int: ...
def start_ns(self) -> int: ...
def end_ns(self) -> int: ...
def duration_ns(self) -> int: ...
def is_async(self) -> bool: ...
def linked_correlation_id(self) -> int: ...

View File

@ -503,8 +503,8 @@ class profile:
if _filter_name(kineto_event.name()):
continue
rel_start_ns = kineto_event.start_ns() - trace_start_ns
rel_end_ns = rel_start_ns + kineto_event.duration_ns()
abs_end_ns = kineto_event.start_ns() + kineto_event.duration_ns()
rel_end_ns = kineto_event.end_ns() - trace_start_ns
abs_end_ns = kineto_event.end_ns()
cpu_memory_usage = 0
device_memory_usage = 0

View File

@ -203,6 +203,8 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
.def("sequence_nr", [](const KinetoEvent& e) { return e.sequenceNr(); })
// absolute start time (since unix epoch) in ns
.def("start_ns", [](const KinetoEvent& e) { return e.startNs(); })
// absolute end time (since unix epoch) in ns
.def("end_ns", [](const KinetoEvent& e) { return e.endNs(); })
// duration in ns
.def("duration_ns", [](const KinetoEvent& e) { return e.durationNs(); })
// used for correlation between high-level PyTorch events

View File

@ -786,6 +786,10 @@ const c10::ArrayRef<std::string> KinetoEvent::moduleHierarchy() const {
return {};
}
uint64_t KinetoEvent::endNs() const {
return result_->endTimeNS();
}
uint64_t KinetoEvent::durationNs() const {
return (result_->endTimeNS() - result_->start_time_ns_);
}

View File

@ -49,6 +49,7 @@ struct TORCH_API KinetoEvent {
int deviceIndex() const;
int64_t nBytes() const;
uint64_t startNs() const;
uint64_t endNs() const;
uint64_t durationNs() const;
bool isAsync() const;
uint64_t correlationId() const;