PG NCCL cleanup: remove usage of completed_ in WorkNCCL copies (#59899)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59899

Test Plan: Imported from OSS

Reviewed By: cbalioglu, osalpekar

Differential Revision: D29080299

Pulled By: agolynski

fbshipit-source-id: 9ae368f91e81f19471e0a20fc913d8e9df1b9dec
This commit is contained in:
Alexander Golynski 2021-06-17 09:03:10 -07:00 committed by Facebook GitHub Bot
parent 010f4b6f2d
commit ed1da5be21

View File

@ -249,7 +249,6 @@ ProcessGroupNCCL::WorkNCCL::WorkNCCL(const WorkNCCL& w)
blockingWait_(w.blockingWait_),
opTimeout_(w.opTimeout_),
workStartTime_(w.workStartTime_) {
completed_ = w.completed_;
exception_ = w.exception_;
}
@ -319,7 +318,6 @@ void ProcessGroupNCCL::WorkNCCL::checkAndThrowException() {
void ProcessGroupNCCL::WorkNCCL::handleNCCLGuard() {
std::lock_guard<std::mutex> lock(mutex_);
completed_ = true;
if (exception_) {
auto exceptionMsg = c10::str(
"Some NCCL operations have failed or timed out. Due to the ",