## @package batch_softmax_loss # Module caffe2.python.layers.batch_softmax_loss from caffe2.python import core, schema from caffe2.python.layers.layers import ModelLayer import numpy as np class BatchSoftmaxLoss(ModelLayer): def __init__( self, model, input_record, name='batch_softmax_loss', label_smoothing_matrix=None, label_prob=False, scale=1.0, average_by_batch_size=False, **kwargs ): super(BatchSoftmaxLoss, self).__init__( model, name, input_record, **kwargs) assert schema.is_schema_subset( schema.Struct( ('label', schema.Scalar()), ('prediction', schema.Scalar()), ), input_record ) self.label_prob = label_prob self.scale = scale self.average_by_batch_size = average_by_batch_size # label smoothing matrix: a K * K matrix where K is the label # cardinality; (i, j) element is the value of for label i # treated/smoothed as label j self.label_smoothing_matrix = label_smoothing_matrix if self.label_smoothing_matrix is not None: self.initialize_label_smoothing_constants() self.output_schema = schema.Struct( ( 'softmax', schema.Scalar( input_record.prediction.field_type(), self.get_next_blob_reference('softmax') ) ), ( 'loss', schema.Scalar( np.float32, self.get_next_blob_reference('loss') ) ), ) def initialize_label_smoothing_constants(self): assert self.label_smoothing_matrix is not None self.label_smoothing_matrix = np.array( self.label_smoothing_matrix).astype(np.float32) assert len(self.label_smoothing_matrix.shape) == 2 label_dim = self.label_smoothing_matrix.shape[0] assert label_dim == self.label_smoothing_matrix.shape[1] self.label_smoothing_matrix = self.model.add_global_constant( '%s_label_smoothing_matrix' % self.name, array=self.label_smoothing_matrix, dtype=np.dtype(np.float32), ) self.label_dim = self.model.add_global_constant( '%s_label_dim' % self.name, array=label_dim, dtype=np.dtype(np.int64), ) # default case: label is given NOT as target distribution # but when used in label smoothing, the label must be in probabilities self.label_prob = True def compute_smoothed_label(self, net): assert self.label_smoothing_matrix is not None label = self.input_record.label() original_label_type = self.input_record.label.field_type() if original_label_type.base != np.int64: int64_label = net.NextScopedBlob('int64_label') net.Cast([label], [int64_label], to=core.DataType.INT64) else: int64_label = label one_hot_label = net.NextScopedBlob('one_hot_label') smoothed_label = net.NextScopedBlob('smoothed_label') net.OneHot([int64_label, self.label_dim], [one_hot_label]) net.MatMul([one_hot_label, self.label_smoothing_matrix], smoothed_label) return smoothed_label def add_ops(self, net): label = self.input_record.label.field_blobs() if self.label_smoothing_matrix is not None: label = [self.compute_smoothed_label(net)] elif not self.label_prob: if self.input_record.label.field_types()[0].base != np.int32: label = [ net.Cast(label, net.NextScopedBlob('int32_label'), to=core.DataType.INT32) ] softmax_input = self.input_record.prediction.field_blobs() + label if 'weight' in self.input_record: weight_blob = self.input_record.weight() if self.input_record.weight.field_type().base != np.float32: weight_blob = net.Cast( weight_blob, weight_blob + '_float32', to=core.DataType.FLOAT ) softmax_input += [weight_blob] net.SoftmaxWithLoss( softmax_input, self.output_schema.field_blobs(), label_prob=self.label_prob, scale=self.scale, average_by_batch_size=self.average_by_batch_size, )