# @package homotopy_weight # Module caffe2.fb.python.layers.homotopy_weight from caffe2.python import core, schema from caffe2.python.layers.layers import ModelLayer import numpy as np import logging logger = logging.getLogger(__name__) ''' Homotopy Weighting between two weights x, y by doing: alpha x + beta y where alpha is a decreasing scalar parameter ranging from [min, max] (default, [0, 1]), and alpha + beta = max + min, which means that beta is increasing in the range [min, max]; Homotopy methods first solves an "easy" problem (one to which the solution is well known), and is gradually transformed into the target problem ''' class HomotopyWeight(ModelLayer): def __init__( self, model, input_record, name='homotopy_weight', min_weight=0., max_weight=1., half_life=1e6, quad_life=3e6, atomic_iter=None, **kwargs ): super(HomotopyWeight, self).__init__(model, name, input_record, **kwargs) self.output_schema = schema.Scalar( np.float32, self.get_next_blob_reference('homotopy_weight') ) data = self.input_record.field_blobs() assert len(data) == 2 self.x = data[0] self.y = data[1] # TODO: currently model building does not have access to iter counter or # learning rate; it's added at optimization time; self.use_external_iter = (atomic_iter is not None) self.atomic_iter = ( atomic_iter if self.use_external_iter else self.create_atomic_iter() ) # to map lr to [min, max]; alpha = scale * lr + offset assert max_weight > min_weight self.scale = float(max_weight - min_weight) self.offset = self.model.add_global_constant( '%s_offset_1dfloat' % self.name, float(min_weight) ) self.gamma, self.power = self.solve_inv_lr_params(half_life, quad_life) def solve_inv_lr_params(self, half_life, quad_life): # ensure that the gamma, power is solvable assert half_life > 0 # convex monotonically decreasing assert quad_life > 2 * half_life t = float(quad_life) / float(half_life) x = t * (1.0 + np.sqrt(2.0)) / 2.0 - np.sqrt(2.0) gamma = (x - 1.0) / float(half_life) power = np.log(2.0) / np.log(x) logger.info( 'homotopy_weighting: found lr param: gamma=%g, power=%g' % (gamma, power) ) return gamma, power def create_atomic_iter(self): self.mutex = self.create_param( param_name=('%s_mutex' % self.name), shape=None, initializer=('CreateMutex', ), optimizer=self.model.NoOptim, ) self.atomic_iter = self.create_param( param_name=('%s_atomic_iter' % self.name), shape=[1], initializer=( 'ConstantFill', { 'value': 0, 'dtype': core.DataType.INT64 } ), optimizer=self.model.NoOptim, ) return self.atomic_iter def update_weight(self, net): alpha = net.NextScopedBlob('alpha') beta = net.NextScopedBlob('beta') lr = net.NextScopedBlob('lr') comp_lr = net.NextScopedBlob('complementary_lr') scaled_lr = net.NextScopedBlob('scaled_lr') scaled_comp_lr = net.NextScopedBlob('scaled_complementary_lr') if not self.use_external_iter: net.AtomicIter([self.mutex, self.atomic_iter], [self.atomic_iter]) net.LearningRate( [self.atomic_iter], [lr], policy='inv', gamma=self.gamma, power=self.power, base_lr=1.0, ) net.Sub([self.model.global_constants['ONE'], lr], [comp_lr]) net.Scale([lr], [scaled_lr], scale=self.scale) net.Scale([comp_lr], [scaled_comp_lr], scale=self.scale) net.Add([scaled_lr, self.offset], [alpha]) net.Add([scaled_comp_lr, self.offset], [beta]) return alpha, beta def add_ops(self, net): alpha, beta = self.update_weight(net) # alpha x + beta y net.WeightedSum([self.x, alpha, self.y, beta], self.output_schema())