diff --git a/torch/nn/utils/clip_grad.py b/torch/nn/utils/clip_grad.py index 976dceaccf5..152cf29e469 100644 --- a/torch/nn/utils/clip_grad.py +++ b/torch/nn/utils/clip_grad.py @@ -197,12 +197,12 @@ def clip_grad_norm_( parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a single Tensor that will have gradients normalized max_norm (float): max norm of the gradients - norm_type (float): type of the used p-norm. Can be ``'inf'`` for - infinity norm. - error_if_nonfinite (bool): if True, an error is thrown if the total + norm_type (float, optional): type of the used p-norm. Can be ``'inf'`` for + infinity norm. Default: 2.0 + error_if_nonfinite (bool, optional): if True, an error is thrown if the total norm of the gradients from :attr:`parameters` is ``nan``, - ``inf``, or ``-inf``. Default: False (will switch to True in the future) - foreach (bool): use the faster foreach-based implementation. + ``inf``, or ``-inf``. Default: False + foreach (bool, optional): use the faster foreach-based implementation. If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently fall back to the slow implementation for other device types. Default: ``None`` @@ -258,7 +258,7 @@ def clip_grad_value_( clip_value (float): maximum allowed value of the gradients. The gradients are clipped in the range :math:`\left[\text{-clip\_value}, \text{clip\_value}\right]` - foreach (bool): use the faster foreach-based implementation + foreach (bool, optional): use the faster foreach-based implementation If ``None``, use the foreach implementation for CUDA and CPU native tensors and silently fall back to the slow implementation for other device types. Default: ``None``