mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
PG NCCL cleanup: remove usage of completed_ in WorkNCCL copies (#59899)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59899 Test Plan: Imported from OSS Reviewed By: cbalioglu, osalpekar Differential Revision: D29080299 Pulled By: agolynski fbshipit-source-id: 9ae368f91e81f19471e0a20fc913d8e9df1b9dec
This commit is contained in:
parent
010f4b6f2d
commit
ed1da5be21
|
|
@ -249,7 +249,6 @@ ProcessGroupNCCL::WorkNCCL::WorkNCCL(const WorkNCCL& w)
|
|||
blockingWait_(w.blockingWait_),
|
||||
opTimeout_(w.opTimeout_),
|
||||
workStartTime_(w.workStartTime_) {
|
||||
completed_ = w.completed_;
|
||||
exception_ = w.exception_;
|
||||
}
|
||||
|
||||
|
|
@ -319,7 +318,6 @@ void ProcessGroupNCCL::WorkNCCL::checkAndThrowException() {
|
|||
|
||||
void ProcessGroupNCCL::WorkNCCL::handleNCCLGuard() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
completed_ = true;
|
||||
if (exception_) {
|
||||
auto exceptionMsg = c10::str(
|
||||
"Some NCCL operations have failed or timed out. Due to the ",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user