mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: There is a module called `2to3` which you can target for future specifically to remove these, the directory of `caffe2` has the most redundant imports: ```2to3 -f future -w caffe2``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/45033 Reviewed By: seemethere Differential Revision: D23808648 Pulled By: bugra fbshipit-source-id: 38971900f0fe43ab44a9168e57f2307580d36a38
125 lines
4.2 KiB
Python
125 lines
4.2 KiB
Python
# @package homotopy_weight
|
|
# Module caffe2.fb.python.layers.homotopy_weight
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from caffe2.python import core, schema
|
|
from caffe2.python.layers.layers import ModelLayer
|
|
import numpy as np
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
'''
|
|
Homotopy Weighting between two weights x, y by doing:
|
|
alpha x + beta y
|
|
where alpha is a decreasing scalar parameter ranging from [min, max] (default,
|
|
[0, 1]), and alpha + beta = max + min, which means that beta is increasing in
|
|
the range [min, max];
|
|
|
|
Homotopy methods first solves an "easy" problem (one to which the solution is
|
|
well known), and is gradually transformed into the target problem
|
|
'''
|
|
|
|
|
|
class HomotopyWeight(ModelLayer):
|
|
def __init__(
|
|
self,
|
|
model,
|
|
input_record,
|
|
name='homotopy_weight',
|
|
min_weight=0.,
|
|
max_weight=1.,
|
|
half_life=1e6,
|
|
quad_life=3e6,
|
|
atomic_iter=None,
|
|
**kwargs
|
|
):
|
|
super(HomotopyWeight,
|
|
self).__init__(model, name, input_record, **kwargs)
|
|
self.output_schema = schema.Scalar(
|
|
np.float32, self.get_next_blob_reference('homotopy_weight')
|
|
)
|
|
data = self.input_record.field_blobs()
|
|
assert len(data) == 2
|
|
self.x = data[0]
|
|
self.y = data[1]
|
|
# TODO: currently model building does not have access to iter counter or
|
|
# learning rate; it's added at optimization time;
|
|
self.use_external_iter = (atomic_iter is not None)
|
|
self.atomic_iter = (
|
|
atomic_iter if self.use_external_iter else self.create_atomic_iter()
|
|
)
|
|
# to map lr to [min, max]; alpha = scale * lr + offset
|
|
assert max_weight > min_weight
|
|
self.scale = float(max_weight - min_weight)
|
|
self.offset = self.model.add_global_constant(
|
|
'%s_offset_1dfloat' % self.name, float(min_weight)
|
|
)
|
|
self.gamma, self.power = self.solve_inv_lr_params(half_life, quad_life)
|
|
|
|
def solve_inv_lr_params(self, half_life, quad_life):
|
|
# ensure that the gamma, power is solvable
|
|
assert half_life > 0
|
|
# convex monotonically decreasing
|
|
assert quad_life > 2 * half_life
|
|
t = float(quad_life) / float(half_life)
|
|
x = t * (1.0 + np.sqrt(2.0)) / 2.0 - np.sqrt(2.0)
|
|
gamma = (x - 1.0) / float(half_life)
|
|
power = np.log(2.0) / np.log(x)
|
|
logger.info(
|
|
'homotopy_weighting: found lr param: gamma=%g, power=%g' %
|
|
(gamma, power)
|
|
)
|
|
return gamma, power
|
|
|
|
def create_atomic_iter(self):
|
|
self.mutex = self.create_param(
|
|
param_name=('%s_mutex' % self.name),
|
|
shape=None,
|
|
initializer=('CreateMutex', ),
|
|
optimizer=self.model.NoOptim,
|
|
)
|
|
self.atomic_iter = self.create_param(
|
|
param_name=('%s_atomic_iter' % self.name),
|
|
shape=[1],
|
|
initializer=(
|
|
'ConstantFill', {
|
|
'value': 0,
|
|
'dtype': core.DataType.INT64
|
|
}
|
|
),
|
|
optimizer=self.model.NoOptim,
|
|
)
|
|
return self.atomic_iter
|
|
|
|
def update_weight(self, net):
|
|
alpha = net.NextScopedBlob('alpha')
|
|
beta = net.NextScopedBlob('beta')
|
|
lr = net.NextScopedBlob('lr')
|
|
comp_lr = net.NextScopedBlob('complementary_lr')
|
|
scaled_lr = net.NextScopedBlob('scaled_lr')
|
|
scaled_comp_lr = net.NextScopedBlob('scaled_complementary_lr')
|
|
if not self.use_external_iter:
|
|
net.AtomicIter([self.mutex, self.atomic_iter], [self.atomic_iter])
|
|
net.LearningRate(
|
|
[self.atomic_iter],
|
|
[lr],
|
|
policy='inv',
|
|
gamma=self.gamma,
|
|
power=self.power,
|
|
base_lr=1.0,
|
|
)
|
|
net.Sub([self.model.global_constants['ONE'], lr], [comp_lr])
|
|
net.Scale([lr], [scaled_lr], scale=self.scale)
|
|
net.Scale([comp_lr], [scaled_comp_lr], scale=self.scale)
|
|
net.Add([scaled_lr, self.offset], [alpha])
|
|
net.Add([scaled_comp_lr, self.offset], [beta])
|
|
return alpha, beta
|
|
|
|
def add_ops(self, net):
|
|
alpha, beta = self.update_weight(net)
|
|
# alpha x + beta y
|
|
net.WeightedSum([self.x, alpha, self.y, beta], self.output_schema())
|