mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/29707 In D17885977, Linearizable label (a multi-class classification) was implemented in MTML. In this diff, we add several items for Linearizable label: - Assigning different weights to each class through ```model_def.tasks[i].class_weights```. - This option is a dictionary, the keys of which are indices of the classes and the values of which are weights for each class. - For example, if a linearizable-label task has 4 classes and its ```class_weights = {"0": 1, "1": 0.1, "2": 0.1, "3": 0.01}```, it means that in the loss function of this task, we assign weight 1 to its first class, weight 0.1 to its second and third class, and weight 0.01 to its forth class. The index/order of classes follows the logic of linearizable label. - Note that when you assign different weights to different classes, you need to correct the calibration by setting an appropriate ```model_def.tasks[i].calibration.linearizable_class_weight```. Basically, the class weights in calibration should be the reciprocals of the class weights in loss function. So the ```calibration.linearizable_class_weight = {"0": 1, "1": 10, "2": 10, "3": 100}``` for the example above. - Example FBLearner job: f150763093 - We also support ```model_def.allow_missing_label_with_zero_weight``` for linearizable label, which will ignore those examples with first label missing, by assigning zero weights to them in loss function. - We need to set ```allow_missing_label_with_zero_weight = true``` to enable it. - Example FBLearner job: f150763093 - Last but not least, we update caffe2 operator ```SoftmaxWithLoss``` to support loss averaged by batch size. - We need to set ```model_def.tasks[i].loss.softmaxLoss.average_by_batch_size = true``` to enable it. - Previously, the loss was averaged by weight sum of examples in batch, which is still the default behavior now (when ```average_by_batch_size = null``` or ```average_by_batch_size = false```). - Without this new feature, the calibration will be incorrect when applying non-equal-weight training among different classes to a linearizable task. - Example FBLearner job with ```average_by_batch_size = true``` results in a correct calibration: f150763093 - Example FBLearner job with ```average_by_batch_size = null``` results in an incorrect calibration: f150762990 Test Plan: buck test caffe2/caffe2/fb/dper/layer_models/tests:mtml_test_2 -- test_linearizable_label_task_with_class_weights buck test caffe2/caffe2/fb/dper/layer_models/tests:mtml_test_2 -- test_linearizable_label_task_with_zero_weight buck test caffe2/caffe2/fb/dper/layer_models/tests:mtml_test_2 -- test_linearizable_label_task_average_by_batch_size All tests passed. full canary: https://fburl.com/fblearner/troznfgh Reviewed By: chenshouyuan Differential Revision: D18461163 fbshipit-source-id: aaf3df031406ae94f74e2e365b57e47409ef0bfe
89 lines
2.8 KiB
C++
89 lines
2.8 KiB
C++
#ifndef SOFTMAX_WITH_LOSS_OP_H_
|
|
#define SOFTMAX_WITH_LOSS_OP_H_
|
|
|
|
#include "caffe2/core/context.h"
|
|
#include "caffe2/core/logging.h"
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/utils/math.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
template <typename T, class Context>
|
|
class SoftmaxWithLossOp final : public Operator<Context> {
|
|
public:
|
|
template <class... Args>
|
|
explicit SoftmaxWithLossOp(Args&&... args)
|
|
: Operator<Context>(std::forward<Args>(args)...),
|
|
scale_(this->template GetSingleArgument<float>("scale", 1.)),
|
|
label_prob_mode_(
|
|
this->template GetSingleArgument<int>("label_prob", 0)),
|
|
average_by_batch_size_(
|
|
this->template GetSingleArgument<int>("average_by_batch_size", 0)),
|
|
order_(StringToStorageOrder(
|
|
this->template GetSingleArgument<string>("order", "NCHW"))),
|
|
axis_(this->template GetSingleArgument<int>("axis", 1)) {
|
|
CAFFE_ENFORCE(scale_ >= 0);
|
|
CAFFE_ENFORCE_EQ(
|
|
order_, StorageOrder::NCHW, "Only NCHW order is supported right now.");
|
|
}
|
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
|
|
|
bool RunOnDevice() override;
|
|
|
|
protected:
|
|
float scale_;
|
|
int label_prob_mode_;
|
|
int average_by_batch_size_;
|
|
StorageOrder order_;
|
|
int axis_;
|
|
|
|
Tensor losses_; // Per example loss
|
|
Tensor rowmax_; // per example row max
|
|
Tensor weights_; // unignored weights
|
|
Tensor sum_multiplier_; // Vector of ones for summing via dot prod
|
|
Tensor total_weight_ptr_;
|
|
// passed to a function
|
|
Tensor scratch_{Context::GetDeviceType()};
|
|
};
|
|
|
|
template <typename T, class Context>
|
|
class SoftmaxWithLossGradientOp final : public Operator<Context> {
|
|
public:
|
|
template <class... Args>
|
|
explicit SoftmaxWithLossGradientOp(Args&&... args)
|
|
: Operator<Context>(std::forward<Args>(args)...),
|
|
scale_(this->template GetSingleArgument<float>("scale", 1.)),
|
|
label_prob_mode_(
|
|
this->template GetSingleArgument<int>("label_prob", 0)),
|
|
average_by_batch_size_(
|
|
this->template GetSingleArgument<int>("average_by_batch_size", 0)),
|
|
order_(StringToStorageOrder(
|
|
this->template GetSingleArgument<string>("order", "NCHW"))),
|
|
only_loss_(this->template GetSingleArgument<bool>("only_loss", false)),
|
|
axis_(this->template GetSingleArgument<int>("axis", 1)) {
|
|
CAFFE_ENFORCE(scale_ >= 0);
|
|
CAFFE_ENFORCE_EQ(
|
|
order_, StorageOrder::NCHW, "Only NCHW order is supported right now.");
|
|
}
|
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
|
|
|
bool RunOnDevice() override;
|
|
|
|
protected:
|
|
float scale_;
|
|
int label_prob_mode_;
|
|
int average_by_batch_size_;
|
|
// not used?
|
|
Tensor sum_multiplier_{Context::GetDeviceType()};
|
|
Tensor weights_; // unignored weights
|
|
Tensor total_weight_ptr_;
|
|
StorageOrder order_;
|
|
bool only_loss_;
|
|
int axis_;
|
|
Tensor scratch_{Context::GetDeviceType()};
|
|
};
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // SOFTMAX_WITH_LOSS_OP_H_
|