mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "Add label_smoothing param in nn.BCELoss and nn.BCEWithLogitsLoss (#150282)"
This reverts commit f990490a23.
Reverted https://github.com/pytorch/pytorch/pull/150282 on behalf of https://github.com/facebook-github-bot due to Diff reverted internally ([comment](https://github.com/pytorch/pytorch/pull/150282#issuecomment-3182844949))
This commit is contained in:
parent
6e8865fbc1
commit
641ee74781
|
|
@ -3472,7 +3472,6 @@ def binary_cross_entropy(
|
|||
size_average: Optional[bool] = None,
|
||||
reduce: Optional[bool] = None,
|
||||
reduction: str = "mean",
|
||||
label_smoothing: float = 0.0,
|
||||
) -> Tensor:
|
||||
r"""Compute Binary Cross Entropy between the target and input probabilities.
|
||||
|
||||
|
|
@ -3491,11 +3490,9 @@ def binary_cross_entropy(
|
|||
elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
|
||||
and :attr:`reduce` are in the process of being deprecated, and in the meantime,
|
||||
specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
|
||||
label_smoothing (float, optional): A float in [0.0, 1.0]. Specifies the amount
|
||||
of smoothing when computing the loss, where 0.0 means no smoothing. The targets
|
||||
become a mixture of the original ground truth and a uniform distribution as described in
|
||||
`Rethinking the Inception Architecture for Computer Vision <https://arxiv.org/abs/1512.00567>`__. Default: :math:`0.0`.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> input = torch.randn(3, 2, requires_grad=True)
|
||||
>>> target = torch.rand(3, 2, requires_grad=False)
|
||||
>>> loss = F.binary_cross_entropy(torch.sigmoid(input), target)
|
||||
|
|
@ -3511,7 +3508,6 @@ def binary_cross_entropy(
|
|||
size_average=size_average,
|
||||
reduce=reduce,
|
||||
reduction=reduction,
|
||||
label_smoothing=label_smoothing,
|
||||
)
|
||||
if size_average is not None or reduce is not None:
|
||||
reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
|
||||
|
|
@ -3527,13 +3523,6 @@ def binary_cross_entropy(
|
|||
new_size = _infer_size(target.size(), weight.size())
|
||||
weight = weight.expand(new_size)
|
||||
|
||||
assert 0 <= label_smoothing <= 1, (
|
||||
f"label_smoothing must be between 0.0 and 1.0. Got: {label_smoothing}"
|
||||
)
|
||||
|
||||
if label_smoothing > 0:
|
||||
target = target * (1 - label_smoothing) + (1 - target) * label_smoothing
|
||||
|
||||
return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
|
||||
|
||||
|
||||
|
|
@ -3545,7 +3534,6 @@ def binary_cross_entropy_with_logits(
|
|||
reduce: Optional[bool] = None,
|
||||
reduction: str = "mean",
|
||||
pos_weight: Optional[Tensor] = None,
|
||||
label_smoothing: float = 0.0,
|
||||
) -> Tensor:
|
||||
r"""Compute Binary Cross Entropy between target and input logits.
|
||||
|
||||
|
|
@ -3572,11 +3560,9 @@ def binary_cross_entropy_with_logits(
|
|||
[C, H, W] the same pos_weights across the batch. To apply the same positive weight
|
||||
along all spatial dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1].
|
||||
Default: ``None``
|
||||
label_smoothing (float, optional): A float in [0.0, 1.0]. Specifies the amount
|
||||
of smoothing when computing the loss, where 0.0 means no smoothing. The targets
|
||||
become a mixture of the original ground truth and a uniform distribution as described in
|
||||
`Rethinking the Inception Architecture for Computer Vision <https://arxiv.org/abs/1512.00567>`__. Default: :math:`0.0`.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> input = torch.randn(3, requires_grad=True)
|
||||
>>> target = torch.empty(3).random_(2)
|
||||
>>> loss = F.binary_cross_entropy_with_logits(input, target)
|
||||
|
|
@ -3593,7 +3579,6 @@ def binary_cross_entropy_with_logits(
|
|||
reduce=reduce,
|
||||
reduction=reduction,
|
||||
pos_weight=pos_weight,
|
||||
label_smoothing=label_smoothing,
|
||||
)
|
||||
if size_average is not None or reduce is not None:
|
||||
reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
|
||||
|
|
@ -3605,13 +3590,6 @@ def binary_cross_entropy_with_logits(
|
|||
f"Target size ({target.size()}) must be the same as input size ({input.size()})"
|
||||
)
|
||||
|
||||
assert 0 <= label_smoothing <= 1, (
|
||||
f"label_smoothing must be between 0.0 and 1.0. Got: {label_smoothing}"
|
||||
)
|
||||
|
||||
if label_smoothing > 0:
|
||||
target = target * (1 - label_smoothing) + (1 - target) * label_smoothing
|
||||
|
||||
return torch.binary_cross_entropy_with_logits(
|
||||
input, target, weight, pos_weight, reduction_enum
|
||||
)
|
||||
|
|
|
|||
|
|
@ -134,7 +134,6 @@ def binary_cross_entropy_with_logits(
|
|||
reduce: bool | None = ...,
|
||||
reduction: str = ...,
|
||||
pos_weight: Tensor | None = ...,
|
||||
label_smoothing: float = ...,
|
||||
) -> Tensor: ...
|
||||
|
||||
__all__ += ["binary_cross_entropy_with_logits"]
|
||||
|
|
@ -146,7 +145,6 @@ def binary_cross_entropy(
|
|||
size_average: bool | None = ...,
|
||||
reduce: bool | None = ...,
|
||||
reduction: str = ...,
|
||||
label_smoothing: float = ...,
|
||||
) -> Tensor: ...
|
||||
|
||||
__all__ += ["binary_cross_entropy"]
|
||||
|
|
|
|||
|
|
@ -692,10 +692,6 @@ class BCELoss(_WeightedLoss):
|
|||
elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
|
||||
and :attr:`reduce` are in the process of being deprecated, and in the meantime,
|
||||
specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
|
||||
label_smoothing (float, optional): A float in [0.0, 1.0]. Specifies the amount
|
||||
of smoothing when computing the loss, where 0.0 means no smoothing. The targets
|
||||
become a mixture of the original ground truth and a uniform distribution as described in
|
||||
`Rethinking the Inception Architecture for Computer Vision <https://arxiv.org/abs/1512.00567>`__. Default: :math:`0.0`.
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(*)`, where :math:`*` means any number of dimensions.
|
||||
|
|
@ -721,21 +717,15 @@ class BCELoss(_WeightedLoss):
|
|||
size_average=None,
|
||||
reduce=None,
|
||||
reduction: str = "mean",
|
||||
label_smoothing: float = 0.0,
|
||||
) -> None:
|
||||
super().__init__(weight, size_average, reduce, reduction)
|
||||
self.label_smoothing = label_smoothing
|
||||
|
||||
def forward(self, input: Tensor, target: Tensor) -> Tensor:
|
||||
"""
|
||||
Runs the forward pass.
|
||||
"""
|
||||
return F.binary_cross_entropy(
|
||||
input,
|
||||
target,
|
||||
weight=self.weight,
|
||||
reduction=self.reduction,
|
||||
label_smoothing=self.label_smoothing,
|
||||
input, target, weight=self.weight, reduction=self.reduction
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -825,10 +815,6 @@ class BCEWithLogitsLoss(_Loss):
|
|||
[C, H, W] the same pos_weights across the batch. To apply the same positive weight
|
||||
along all spatial dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1].
|
||||
Default: ``None``
|
||||
label_smoothing (float, optional): A float in [0.0, 1.0]. Specifies the amount
|
||||
of smoothing when computing the loss, where 0.0 means no smoothing. The targets
|
||||
become a mixture of the original ground truth and a uniform distribution as described in
|
||||
`Rethinking the Inception Architecture for Computer Vision <https://arxiv.org/abs/1512.00567>`__. Default: :math:`0.0`.
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(*)`, where :math:`*` means any number of dimensions.
|
||||
|
|
@ -852,14 +838,12 @@ class BCEWithLogitsLoss(_Loss):
|
|||
reduce=None,
|
||||
reduction: str = "mean",
|
||||
pos_weight: Optional[Tensor] = None,
|
||||
label_smoothing: float = 0.0,
|
||||
) -> None:
|
||||
super().__init__(size_average, reduce, reduction)
|
||||
self.register_buffer("weight", weight)
|
||||
self.register_buffer("pos_weight", pos_weight)
|
||||
self.weight: Optional[Tensor]
|
||||
self.pos_weight: Optional[Tensor]
|
||||
self.label_smoothing = label_smoothing
|
||||
|
||||
def forward(self, input: Tensor, target: Tensor) -> Tensor:
|
||||
"""Runs the forward pass."""
|
||||
|
|
@ -869,7 +853,6 @@ class BCEWithLogitsLoss(_Loss):
|
|||
self.weight,
|
||||
pos_weight=self.pos_weight,
|
||||
reduction=self.reduction,
|
||||
label_smoothing=self.label_smoothing,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -488,7 +488,7 @@ def get_testing_overrides() -> dict[Callable, Callable]:
|
|||
torch.bernoulli: lambda input, generator=None, out=None: -1,
|
||||
torch.bilinear: lambda input1, input2, weight, bias: -1,
|
||||
torch.binary_cross_entropy_with_logits: (
|
||||
lambda input, target, weight=None, size_average=None, reduce=None, reduction="mean", pos_weight=None, label_smoothing=0.0: -1 # noqa: B950
|
||||
lambda input, target, weight=None, size_average=None, reduce=None, reduction="mean", pos_weight=None: -1
|
||||
),
|
||||
torch.bincount: lambda input, weights=None, minlength=0: -1,
|
||||
torch.binomial: lambda count, prob, generator=None: -1,
|
||||
|
|
@ -851,10 +851,10 @@ def get_testing_overrides() -> dict[Callable, Callable]:
|
|||
),
|
||||
torch.nn.functional.bilinear: lambda input1, input2, weight, bias=None: -1,
|
||||
torch.nn.functional.binary_cross_entropy: (
|
||||
lambda input, target, weight=None, size_average=None, reduce=None, reduction="mean", label_smoothing=0.0: -1
|
||||
lambda input, target, weight=None, size_average=None, reduce=None, reduction="mean": -1
|
||||
),
|
||||
torch.nn.functional.binary_cross_entropy_with_logits: (
|
||||
lambda input, target, weight=None, size_average=None, reduce=None, reduction="mean", pos_weight=None, label_smoothing=0.0: -1 # noqa: B950
|
||||
lambda input, target, weight=None, size_average=None, reduce=None, reduction="mean", pos_weight=None: -1
|
||||
),
|
||||
torch.nn.functional.celu: lambda input, alpha=1.0, inplace=False: -1,
|
||||
torch.nn.functional.cosine_embedding_loss: (
|
||||
|
|
|
|||
|
|
@ -1463,14 +1463,9 @@ def module_inputs_torch_nn_BCELoss(module_info, device, dtype, requires_grad, tr
|
|||
('reduction_mean', {'reduction': 'mean'}),
|
||||
('reduction_none', {'reduction': 'none'}),
|
||||
('weights', {'weight': make_weight((10,))}),
|
||||
('label_smoothing', {'label_smoothing': 0.15}),
|
||||
]
|
||||
|
||||
def bce_loss_reference_fn(m, p, i, t, reduction='mean', weight=None, label_smoothing=0.0):
|
||||
assert 0 <= label_smoothing <= 1
|
||||
if label_smoothing > 0:
|
||||
t = t * (1 - label_smoothing) + (1 - t) * label_smoothing
|
||||
|
||||
def bce_loss_reference_fn(m, p, i, t, reduction='mean', weight=None):
|
||||
result = -(t * i.log() + (1 - t) * (1 - i).log())
|
||||
|
||||
if weight is not None:
|
||||
|
|
@ -1516,15 +1511,10 @@ def module_inputs_torch_nn_BCEWithLogitsLoss(module_info, device, dtype, require
|
|||
('reduction_mean', {'reduction': 'mean'}),
|
||||
('reduction_none', {'reduction': 'none'}),
|
||||
('weights', {'weight': make_weight((10,))}),
|
||||
('scalar_weights', {'weight': make_weight(())}),
|
||||
('label_smoothing', {'label_smoothing': 0.15}),
|
||||
('scalar_weights', {'weight': make_weight(())})
|
||||
]
|
||||
|
||||
def bce_withlogitsloss_reference_fn(m, p, i, t, reduction='mean', weight=None, label_smoothing=0.0):
|
||||
assert 0 <= label_smoothing <= 1
|
||||
if label_smoothing > 0:
|
||||
t = t * (1 - label_smoothing) + (1 - t) * label_smoothing
|
||||
|
||||
def bce_withlogitsloss_reference_fn(m, p, i, t, reduction='mean', weight=None):
|
||||
# TODO: add pos_weight to the definition here and corresponding SampleInputs
|
||||
max_val = (-i).clamp(min=0)
|
||||
result = (1 - t).mul_(i).add_(max_val).add_((-max_val).exp_().add_((-i - max_val).exp_()).log_())
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user