mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Add warning for incorrected grad results at world size 1 (#154928)
Add warning for the issue discussed at https://github.com/pytorch/pytorch/issues/144045 Pull Request resolved: https://github.com/pytorch/pytorch/pull/154928 Approved by: https://github.com/weifengpy
This commit is contained in:
parent
eb4cf59ecd
commit
82fb904140
|
|
@ -569,6 +569,7 @@ def _get_gradient_divide_factors(
|
|||
) -> Union[tuple[None, None], tuple[float, float]]:
|
||||
# For fp32/bf16, we do not need to worry about overflow/underflow, so we
|
||||
# use NCCL's built-in division to avoid separate div kernels
|
||||
# Warning: NCCL ReduceOp.AVG may produce incorrect results with world size 1.
|
||||
if reduce_dtype in (torch.float32, torch.bfloat16) and device_type != "mtia":
|
||||
return None, None
|
||||
data_parallel_size = reduce_scatter_group.size()
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user