mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: In order to select more important features in dot product among a list of candidate sparse features, we can assign one learnable weight on each feature, reweight each feature by multiplying the weight onto its embedding before dot product. We finally select features based on the weight magnitude after training. We can perform L1 and/or L2 regularization on the weights. To summarize, the weights tend to shrink their values (avoiding overfitting) due to L2 regularization, and some weights will vanish to zero as L1. To avoid sparse feature embedding being ignored due to early collapse of weights, a piece lr warm up policy is used in optimizing regularization term, such that regularization is weak at first stage and gets stronger afterwards (a small lr constant in iters less than threshold 1, a medium lr constant in stage 2, and a final reasonable large lr constant in all iters after threshold 2). The features with nonzero and relatively large weights (in absolute value) will be selected for the module. We can also apply softmax on the original weights to make it sum to 1. We can even boosting the softmaxed weights by multiply the number of softmax components, which essentially make them sum to the number of softmax components and avergae to 1. In this idea, all the weights are positive and sum to a constant. Regularization is not a must since we can count on the competition between softmax weights themselves to achieve reasonable re-weighting. We expect those weights be more dense, comparing with sparse ones from L1 regularization and we can select features based on top K weights. Overall, we aim to demonstrate the selected feature set outperform current v0 feature set in experiments. Special acknowledgement goes to Shouyuan Chen, who initiated the work of regularizable weighting. --- Pull Request resolved: https://github.com/pytorch/pytorch/pull/22176 The diff will export updates to Github repository, as stated below. {F162787228} Basically, the updates on the files are summarized as below: - adding logger messages `caffe2/python/layer_model_helper.py` - add ElasticNet regularizer, which combines both L1 and L2 regularization `caffe2/python/regularizer.py` - implement piecewarmup, specifically warm up with three constant pieces `caffe2/sgd/learning_rate_functors.h, caffe2/sgd/learning_rate_op.cc, caffe2/sgd/learning_rate_op.h` Differential Revision: D15923430 fbshipit-source-id: ee18902cb88c23b1b7b367cc727d690a21e4cda9
737 lines
28 KiB
Python
737 lines
28 KiB
Python
# @package layer_model_helper
|
|
# Module caffe2.python.layer_model_helper
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from caffe2.python import core, model_helper, schema, scope, utils, muji
|
|
from caffe2.python.modeling.parameter_info import (
|
|
ParameterInfo,
|
|
)
|
|
from caffe2.python.modeling.parameter_sharing import (
|
|
parameter_sharing_context,
|
|
)
|
|
from caffe2.python.modeling.net_modifier import NetModifier
|
|
|
|
from caffe2.python.optimizer import get_param_device, Optimizer
|
|
from caffe2.python.regularizer import Regularizer, RegularizationBy
|
|
from caffe2.python.layers import layers
|
|
from caffe2.proto import caffe2_pb2
|
|
from future.utils import viewitems, viewvalues
|
|
|
|
import logging
|
|
import numpy as np
|
|
import six
|
|
import copy
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class LayerModelHelper(model_helper.ModelHelper):
|
|
"""
|
|
Model helper for building models on top of layers abstractions.
|
|
|
|
Each layer is the abstraction that is higher level than Operator. Layer
|
|
is responsible for ownership of it's own parameters and can easily be
|
|
instantiated in multiple nets possible with different sets of ops.
|
|
As an example: one can easily instantiate predict and train nets from
|
|
the same set of layers, where predict net will have subset of the
|
|
operators from train net.
|
|
"""
|
|
|
|
def __init__(self, name, input_feature_schema, trainer_extra_schema,
|
|
keep_blobs=False):
|
|
''' TODO(amalevich): more documnetation on input args
|
|
'''
|
|
|
|
super(LayerModelHelper, self).__init__(name=name)
|
|
self._layer_names = set()
|
|
self._layers = []
|
|
self._param_to_shape = {}
|
|
|
|
# seed default
|
|
self._seed = None
|
|
self._sequence_seed = True
|
|
|
|
# optimizer bookkeeping
|
|
self.param_to_optim = {}
|
|
self.param_to_reg = {}
|
|
|
|
self._default_optimizer = None
|
|
self._loss = None
|
|
self._prediction = []
|
|
self._output_schema = None
|
|
|
|
self._post_grad_net_modifiers = []
|
|
self._final_net_modifiers = []
|
|
|
|
# breakdown map; breakdown features are categorical (like dense) but not
|
|
# necessarily used to represent data for training
|
|
self._breakdown_map = None
|
|
|
|
# Connect Schema to self.net. That particular instance of schmea will be
|
|
# use for generation of the Layers accross the network and would be used
|
|
# for connection with Readers.
|
|
self._input_feature_schema = schema.NewRecord(
|
|
self.net,
|
|
input_feature_schema
|
|
) if not keep_blobs else input_feature_schema.clone()
|
|
self._trainer_extra_schema = schema.NewRecord(
|
|
self.net,
|
|
trainer_extra_schema
|
|
) if not keep_blobs else trainer_extra_schema.clone()
|
|
self._metrics_schema = schema.Struct()
|
|
|
|
self._preproc_output_schema = None
|
|
|
|
self._init_global_constants()
|
|
self.param_init_net = self.create_init_net('param_init_net')
|
|
self._initialize_params = True
|
|
|
|
# additional (hard-coded) diagnose_options to report based on the model
|
|
# TODO(xlwang): it's hack!
|
|
self.ad_hoc_diagnose_blobs_and_operations = []
|
|
self.ad_hoc_plot_blobs = []
|
|
|
|
def clear_output_schema(self):
|
|
self._output_schema = None
|
|
|
|
def set_initialize_params(self, initialize_params):
|
|
self._initialize_params = initialize_params
|
|
|
|
def add_metric_field(self, name, value):
|
|
assert name not in self._metrics_schema.fields, (
|
|
"Try to add metric field twice: {}".format(name))
|
|
self._metrics_schema = self._metrics_schema + schema.Struct(
|
|
(name, value)
|
|
)
|
|
|
|
# an empty white_set will skip everything
|
|
def filter_metrics_schema(self, white_set):
|
|
logger.info("Filter metric schema with white_set {}".format(white_set))
|
|
field_names = self._metrics_schema.field_names()
|
|
for name in field_names:
|
|
if name not in white_set:
|
|
self._metrics_schema = self._metrics_schema - schema.Struct((name, schema.Scalar()))
|
|
|
|
def add_ad_hoc_plot_blob(self, blob, dtype=None):
|
|
assert isinstance(
|
|
blob, (six.string_types, core.BlobReference)
|
|
), "expect type str or BlobReference, but got {}".format(type(blob))
|
|
dtype = dtype or (np.float, (1, ))
|
|
self.add_metric_field(str(blob), schema.Scalar(dtype, blob))
|
|
self.ad_hoc_plot_blobs.append(blob)
|
|
|
|
@staticmethod
|
|
def _get_global_constant_initializer_op(
|
|
blob_name, array=None, dtype=None, initializer=None
|
|
):
|
|
# to add a global constant to model, one first need to get the
|
|
# initializer
|
|
if array is not None:
|
|
assert initializer is None,\
|
|
"Only one from array and initializer should be specified"
|
|
if dtype is None:
|
|
array = np.array(array)
|
|
else:
|
|
array = np.array(array, dtype=dtype)
|
|
|
|
# TODO: make GivenTensor generic
|
|
op_name = None
|
|
if array.dtype == np.int32:
|
|
op_name = 'GivenTensorIntFill'
|
|
elif array.dtype == np.int64:
|
|
op_name = 'GivenTensorInt64Fill'
|
|
elif array.dtype == np.str:
|
|
op_name = 'GivenTensorStringFill'
|
|
elif array.dtype == np.bool:
|
|
op_name = 'GivenTensorBoolFill'
|
|
else:
|
|
op_name = 'GivenTensorFill'
|
|
|
|
def initializer(blob_name):
|
|
return core.CreateOperator(
|
|
op_name, [],
|
|
blob_name,
|
|
shape=array.shape,
|
|
values=array.flatten().tolist()
|
|
)
|
|
else:
|
|
assert initializer is not None
|
|
initializer_op = initializer(blob_name)
|
|
return initializer_op
|
|
|
|
def add_global_constant(
|
|
self, name, array=None, dtype=None, initializer=None
|
|
):
|
|
assert isinstance(name, six.string_types), (
|
|
'name should be a string as we are using it as map key')
|
|
# This is global namescope for constants. They will be created in all
|
|
# init_nets and there should be very few of them.
|
|
assert name not in self.global_constants, \
|
|
"%s already added in global_constants" % name
|
|
blob_name = self.net.NextBlob(name)
|
|
self.global_constants[name] = blob_name
|
|
initializer_op = LayerModelHelper._get_global_constant_initializer_op(
|
|
blob_name, array, dtype, initializer
|
|
)
|
|
assert blob_name not in self.global_constant_initializers, \
|
|
"there is already a initializer op associated with blob %s" % \
|
|
blob_name
|
|
self.global_constant_initializers[blob_name] = initializer_op
|
|
return blob_name
|
|
|
|
def maybe_add_global_constant(self, name, *args, **kwargs):
|
|
# To ad hoc add new global constants without duplication
|
|
# if the name was already registered in global_constants, it will not be
|
|
# added even if the intended value is different from its original value
|
|
|
|
if name in self.global_constants:
|
|
blob_name = self.global_constants[name]
|
|
initializer_op = \
|
|
LayerModelHelper._get_global_constant_initializer_op(
|
|
blob_name, *args, **kwargs
|
|
)
|
|
# check if the original initializer is the same as the one intended
|
|
# now
|
|
assert utils.OpAlmostEqual(
|
|
initializer_op,
|
|
self.global_constant_initializers[blob_name],
|
|
'debug_info'
|
|
), \
|
|
"conflict initializers for global constant %s, " \
|
|
"previous %s, now %s" % (
|
|
blob_name, str(initializer_op),
|
|
str(self.global_constant_initializers[blob_name]))
|
|
return blob_name
|
|
return self.add_global_constant(name, *args, **kwargs)
|
|
|
|
def _init_global_constants(self):
|
|
self.global_constants = {}
|
|
self.global_constant_initializers = {}
|
|
self.add_global_constant('ONE', 1.0)
|
|
self.add_global_constant('ZERO', 0.0)
|
|
self.add_global_constant('ZERO_RANGE', [0, 0], dtype='int32')
|
|
|
|
def _add_global_constants(self, init_net):
|
|
for initializer_op in viewvalues(self.global_constant_initializers):
|
|
init_net._net.op.extend([initializer_op])
|
|
|
|
def create_init_net(self, name):
|
|
init_net = core.Net(name)
|
|
self._add_global_constants(init_net)
|
|
return init_net
|
|
|
|
def _validate_param_shape(self, param_name, shape):
|
|
if param_name not in self._param_to_shape:
|
|
return
|
|
|
|
ref_shape = self._param_to_shape[param_name]
|
|
|
|
if shape != ref_shape:
|
|
raise ValueError(
|
|
"Got inconsistent shapes between shared parameters "
|
|
"when trying to map a blob in scope {0} to {1}. ref_shape : "
|
|
" {2}, shape : {3}".format(
|
|
scope.CurrentNameScope(), param_name, ref_shape, shape)
|
|
)
|
|
|
|
def _validate_param_optim(self, param_name, optim):
|
|
# there are three possible values for optim:
|
|
# 1) None (which will use self._default_optimizer after this layer is instantiated)
|
|
# 2) self.NoOptim
|
|
# 3) an instance of Optimizer class such as AdagradOptimizer
|
|
|
|
# this implies this parameter is not shared with any other parameter so far
|
|
if param_name not in self.param_to_optim:
|
|
return
|
|
|
|
logger.info("{} shares the same parameter with another parameter. "
|
|
"Validating if the same optimizer has been specified for them.".format(
|
|
param_name,
|
|
))
|
|
|
|
ref_optim = self.param_to_optim[param_name]
|
|
|
|
if optim is None:
|
|
assert ref_optim == self._default_optimizer, (
|
|
"Optim for {} is None which will fall back to use default_optimizer. "
|
|
"However, the optimizer that has been specified for this shared parameter "
|
|
"is {} which is different from default_optimizer {}. "
|
|
"Please check the optimizers specified for parameters shared "
|
|
"with {} and the default_optimizer to ensure the consistency.".format(
|
|
param_name, ref_optim, self._default_optimizer, param_name
|
|
)
|
|
)
|
|
elif optim == self.NoOptim:
|
|
assert ref_optim == self.NoOptim, (
|
|
"Optim for {} is NoOptim. However, the optimizer for the parameters "
|
|
"shared with {} is {} which is different from NoOptim. "
|
|
"Please check the optimizer specified for other parameters in the "
|
|
"shared group to ensure consistency.".format(
|
|
param_name, param_name, ref_optim
|
|
)
|
|
)
|
|
elif isinstance(optim, Optimizer):
|
|
assert isinstance(ref_optim, Optimizer), (
|
|
"Optim for {} is an instance of Optimizer. However, the optimizer "
|
|
"for the parameters shared with {} is {} which is not an instance "
|
|
"of Optimizer. Please check the optimizer specified for other "
|
|
" parameters in the shared group to ensure consistency.".format(
|
|
param_name, param_name, ref_optim, optim
|
|
)
|
|
)
|
|
|
|
assert type(optim) is type(ref_optim) and optim.attributes == ref_optim.attributes, (
|
|
"Optim for {} is an instance of Optimizer. However, the optimizer "
|
|
"for the parameters shared with {} is {}. "
|
|
"This optimizer either doesn't have the same type as the current optimizer: "
|
|
"{} vs {}, or its attributes such as learning rate are different from "
|
|
"that of current optimizer which is {} vs {}. "
|
|
"Please check the optimizer specified for other parameters in the "
|
|
"shared group to ensure consistency.".format(
|
|
param_name, param_name, ref_optim, type(optim), type(ref_optim), optim.attributes, ref_optim.attributes
|
|
)
|
|
)
|
|
else:
|
|
raise ValueError("optim should be either None, NoOptim, or an instance of Optimizer, Got {} ".format(optim))
|
|
|
|
def create_param(self, param_name, shape, initializer, optimizer=None,
|
|
ps_param=None, regularizer=None):
|
|
if isinstance(param_name, core.BlobReference):
|
|
param_name = str(param_name)
|
|
elif isinstance(param_name, six.string_types):
|
|
# Parameter name will be equal to current Namescope that got
|
|
# resolved with the respect of parameter sharing of the scopes.
|
|
param_name = parameter_sharing_context.get_parameter_name(
|
|
param_name)
|
|
else:
|
|
raise ValueError("Unsupported type for param_name")
|
|
|
|
param_blob = core.BlobReference(param_name)
|
|
|
|
if len(initializer) == 1:
|
|
init_op_args = {}
|
|
else:
|
|
assert len(initializer) == 2
|
|
init_op_args = copy.deepcopy(initializer[1])
|
|
if shape is not None:
|
|
assert 'shape' not in init_op_args
|
|
init_op_args.update({'shape': shape})
|
|
|
|
initializer_op = None
|
|
if self._initialize_params:
|
|
initializer_op = core.CreateOperator(
|
|
initializer[0],
|
|
[],
|
|
param_blob,
|
|
**init_op_args
|
|
)
|
|
|
|
param = layers.LayerParameter(
|
|
parameter=param_blob,
|
|
initializer=initializer_op,
|
|
optimizer=optimizer,
|
|
ps_param=ps_param,
|
|
regularizer=regularizer
|
|
)
|
|
|
|
self._validate_param_shape(param_name, shape)
|
|
|
|
self._validate_param_optim(param_name, optimizer)
|
|
|
|
self._param_to_shape[param_name] = shape
|
|
|
|
return param
|
|
|
|
def next_layer_name(self, prefix):
|
|
base_name = core.ScopedName(prefix)
|
|
name = base_name
|
|
index = 0
|
|
while name in self._layer_names:
|
|
name = base_name + '_auto_' + str(index)
|
|
index += 1
|
|
|
|
self._layer_names.add(name)
|
|
return name
|
|
|
|
def add_layer(self, layer):
|
|
self._layers.append(layer)
|
|
for param in layer.get_parameters():
|
|
assert isinstance(param.parameter, core.BlobReference)
|
|
|
|
self.param_to_optim[str(param.parameter)] = \
|
|
param.optimizer or self.default_optimizer
|
|
|
|
self.params.append(param.parameter)
|
|
if isinstance(param, layers.LayerParameter):
|
|
logger.info("Add parameter regularizer {0}".format(param.parameter))
|
|
self.param_to_reg[param.parameter] = param.regularizer
|
|
elif isinstance(param, ParameterInfo):
|
|
# TODO:
|
|
# Currently, LSTM and RNNcells, which use ModelHelper instead of
|
|
# LayerModelHelper as super class, are called in pooling_methods
|
|
# In ModelHelper, regularization is not supported in create_param
|
|
# We will unify the way of create_param of ModelHelper and
|
|
# LayerModelHelper in the future.
|
|
logger.info('regularization is unsupported for ParameterInfo object')
|
|
else:
|
|
raise ValueError(
|
|
'unknown object type besides ParameterInfo and LayerParameter: {}'
|
|
.format(param)
|
|
)
|
|
|
|
# The primary value of adding everything to self.net - generation of the
|
|
# operators right away, i.e. if error happens it'll be detected
|
|
# immediately. Other than this - create_x_net should be called.
|
|
layer.add_operators(self.net, self.param_init_net)
|
|
return layer.output_schema
|
|
|
|
def get_parameter_blobs(self):
|
|
param_blobs = []
|
|
for layer in self._layers:
|
|
for param in layer.get_parameters():
|
|
param_blobs.append(param.parameter)
|
|
|
|
return param_blobs
|
|
|
|
def add_post_grad_net_modifiers(self, modifier):
|
|
assert modifier not in self._post_grad_net_modifiers,\
|
|
"{0} is already in {1}".format(modifier, self._post_grad_net_modifiers)
|
|
assert isinstance(modifier, NetModifier),\
|
|
"{} has to be a NetModifier instance".format(modifier)
|
|
self._post_grad_net_modifiers.append(modifier)
|
|
|
|
def add_final_net_modifiers(self, modifier):
|
|
assert modifier not in self._final_net_modifiers,\
|
|
"{0} is already in {1}".format(modifier, self._final_net_modifiers)
|
|
assert isinstance(modifier, NetModifier),\
|
|
"{} has to be a NetModifier instance".format(modifier)
|
|
self._final_net_modifiers.append(modifier)
|
|
|
|
@property
|
|
def seed(self):
|
|
return self._seed
|
|
|
|
@property
|
|
def sequence_seed(self):
|
|
return self._sequence_seed
|
|
|
|
def store_seed(self, seed, sequence_seed=True):
|
|
# Store seed config that will be applied to each op in the net.
|
|
self._seed = seed
|
|
# If sequence_seed is True, the i-th op has rand_seed=`seed + i`
|
|
self._sequence_seed = sequence_seed
|
|
|
|
def apply_seed(self, net):
|
|
if self._seed:
|
|
net.set_rand_seed(self._seed, self._sequence_seed)
|
|
|
|
@property
|
|
def default_optimizer(self):
|
|
return self._default_optimizer
|
|
|
|
@default_optimizer.setter
|
|
def default_optimizer(self, optimizer):
|
|
self._default_optimizer = optimizer
|
|
|
|
@property
|
|
def input_feature_schema(self):
|
|
return self._input_feature_schema
|
|
|
|
@property
|
|
def trainer_extra_schema(self):
|
|
return self._trainer_extra_schema
|
|
|
|
@property
|
|
def metrics_schema(self):
|
|
"""
|
|
Returns the schema that represents model output that should be used for
|
|
metric reporting.
|
|
|
|
During the training/evaluation this schema will be appended to the
|
|
schema that represents model output.
|
|
"""
|
|
return self._metrics_schema
|
|
|
|
@property
|
|
def output_schema(self):
|
|
assert self._output_schema is not None
|
|
return self._output_schema
|
|
|
|
@output_schema.setter
|
|
def output_schema(self, schema):
|
|
assert self._output_schema is None
|
|
self._output_schema = schema
|
|
|
|
@property
|
|
def preproc_output_schema(self):
|
|
assert self._preproc_output_schema is not None
|
|
return self._preproc_output_schema
|
|
|
|
@preproc_output_schema.setter
|
|
def preproc_output_schema(self, schema):
|
|
assert self._preproc_output_schema is None
|
|
self._preproc_output_schema = schema
|
|
|
|
@property
|
|
def prediction(self):
|
|
assert self._prediction, "model prediction is empty"
|
|
return self._prediction
|
|
|
|
def add_prediction(self, prediction, weight=1.0):
|
|
assert prediction is not None, "Added prediction should not be None"
|
|
self._prediction.append((prediction, weight))
|
|
|
|
@property
|
|
def loss(self):
|
|
assert self._loss is not None
|
|
return self._loss
|
|
|
|
@loss.setter
|
|
def loss(self, loss):
|
|
assert self._loss is None
|
|
self._loss = loss
|
|
|
|
def has_loss(self):
|
|
return self._loss is not None
|
|
|
|
def add_loss(self, loss, name='unnamed'):
|
|
assert loss is not None, "Added loss should not be None"
|
|
assert isinstance(loss, schema.Scalar) or isinstance(
|
|
loss, schema.Struct
|
|
), "Added loss should be a scalar or a struct"
|
|
if self._loss is None:
|
|
self._loss = schema.Struct((name, loss))
|
|
else:
|
|
# loss could've been set through model.loss directly which could be
|
|
# a scalar
|
|
if isinstance(self._loss, schema.Scalar):
|
|
self._loss = schema.Struct(('unnamed', self._loss))
|
|
|
|
prefix_base = name + '_auto_'
|
|
index = 0
|
|
prefix = name
|
|
while prefix in self._loss:
|
|
prefix = prefix_base + str(index)
|
|
index += 1
|
|
loss_struct = schema.Struct((prefix, loss))
|
|
self._loss = self._loss + loss_struct
|
|
|
|
def add_output_schema(self, name, value):
|
|
assert value is not None, \
|
|
'Added output schema {} should not be None'.format(name)
|
|
assert isinstance(value, schema.Scalar) or \
|
|
isinstance(value, schema.Struct), \
|
|
'Added output schema {} should be a scalar or a struct.\n\
|
|
Now it is {}.'.format(name, type(value))
|
|
if self._output_schema is None: # be the first field
|
|
self._output_schema = schema.Struct((name, value))
|
|
else: # merge with other fields
|
|
assert name not in self._output_schema.fields, \
|
|
'Output Schema Field {} already exists'.format(name)
|
|
self._output_schema = \
|
|
self._output_schema + schema.Struct((name, value))
|
|
|
|
def add_trainer_extra_schema(self, trainer_extra_schema):
|
|
trainer_extra_record = schema.NewRecord(self.net, trainer_extra_schema)
|
|
self._trainer_extra_schema += trainer_extra_record
|
|
|
|
def __getattr__(self, layer):
|
|
def is_functional_layer(layer):
|
|
if core.IsOperator(layer):
|
|
return True
|
|
elif layer.startswith('FunctionalLayer'):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def resolve_functional_layer(layer):
|
|
if core.IsOperator(layer):
|
|
return layer
|
|
elif layer.startswith('FunctionalLayer'):
|
|
return layer[len('FunctionalLayer'):]
|
|
else:
|
|
raise ValueError(
|
|
'%s cannot be resolved as functional layer' % layer
|
|
)
|
|
|
|
if layer.startswith('__'):
|
|
raise AttributeError(layer)
|
|
|
|
# TODO(amalevich): Add add support for ifbpy inline documentation
|
|
if layers.layer_exists(layer):
|
|
def wrapper(*args, **kwargs):
|
|
new_layer = layers.create_layer(layer, self, *args, **kwargs)
|
|
if kwargs.get("output_to_metrics", False):
|
|
new_layer.export_output_for_metrics()
|
|
if kwargs.get("params_to_metrics", False):
|
|
new_layer.export_params_for_metrics()
|
|
return self.add_layer(new_layer)
|
|
return wrapper
|
|
elif is_functional_layer(layer):
|
|
# TODO(xlwang): Desginated layer shadows the usage of an op as a
|
|
# single layer. To enforce using an op (e.g. Split) as functional
|
|
# layer, one can call 'model.FunctionalLayerSplit'
|
|
layer = resolve_functional_layer(layer)
|
|
|
|
def wrapper(*args, **kwargs):
|
|
def apply_operator(net, in_record, out_record, **kwargs):
|
|
# TODO(amalevich): Switch to net.operator as soon as it gets
|
|
# landed
|
|
net.__getattr__(layer)(in_record.field_blobs(),
|
|
out_record.field_blobs(),
|
|
**kwargs)
|
|
|
|
if 'name' not in kwargs:
|
|
kwargs['name'] = layer
|
|
|
|
new_layer = layers.create_layer(
|
|
'Functional',
|
|
self, *args, function=apply_operator,
|
|
**kwargs
|
|
)
|
|
|
|
if kwargs.get("output_to_metrics", False):
|
|
new_layer.export_output_for_metrics()
|
|
if kwargs.get("params_to_metrics", False):
|
|
new_layer.export_params_for_metrics()
|
|
|
|
return self.add_layer(new_layer)
|
|
return wrapper
|
|
else:
|
|
# this needs to be an AttributeError to fit hasattr semantics
|
|
raise AttributeError(
|
|
"Trying to create non-registered layer: {}".format(layer))
|
|
|
|
@property
|
|
def layers(self):
|
|
return self._layers
|
|
|
|
def apply_regularizers_on_loss(
|
|
self,
|
|
train_net,
|
|
train_init_net,
|
|
blob_to_device=None,
|
|
):
|
|
logger.info("apply regularizer on loss")
|
|
for param, regularizer in viewitems(self.param_to_reg):
|
|
if regularizer is None:
|
|
continue
|
|
logger.info("add regularizer {0} for param {1} to loss".format(regularizer, param))
|
|
assert isinstance(regularizer, Regularizer)
|
|
added_loss_blob = regularizer(train_net, train_init_net, param, grad=None,
|
|
by=RegularizationBy.ON_LOSS)
|
|
logger.info(added_loss_blob)
|
|
if added_loss_blob is not None:
|
|
self.add_loss(
|
|
schema.Scalar(blob=added_loss_blob),
|
|
str(added_loss_blob)
|
|
)
|
|
|
|
def apply_regularizers_after_optimizer(
|
|
self,
|
|
train_net,
|
|
train_init_net,
|
|
grad_map,
|
|
blob_to_device=None,
|
|
):
|
|
logger.info("apply regulizer after optimizer")
|
|
CPU = muji.OnCPU()
|
|
# if given, blob_to_device is a map from blob to device_option
|
|
blob_to_device = blob_to_device or {}
|
|
for param, regularizer in viewitems(self.param_to_reg):
|
|
if regularizer is None:
|
|
continue
|
|
assert isinstance(regularizer, Regularizer)
|
|
logger.info("add regularizer {0} for param {1} to optimizer".format(regularizer, param))
|
|
device = get_param_device(
|
|
param,
|
|
grad_map.get(str(param)),
|
|
param_to_device=blob_to_device,
|
|
default_device=CPU,
|
|
)
|
|
with core.DeviceScope(device):
|
|
regularizer(
|
|
train_net, train_init_net, param, grad=grad_map.get(str(param)),
|
|
by=RegularizationBy.AFTER_OPTIMIZER
|
|
)
|
|
|
|
def apply_post_grad_net_modifiers(
|
|
self,
|
|
trainer_net,
|
|
trainer_init_net,
|
|
grad_map,
|
|
blob_to_device=None,
|
|
modify_output_record=False,
|
|
):
|
|
param_grad_map = {param: grad_map[param]
|
|
for param in self.param_to_optim.keys() if param in grad_map}
|
|
|
|
for modifier in self._post_grad_net_modifiers:
|
|
modifier(trainer_net, trainer_init_net, param_grad_map,
|
|
blob_to_device=blob_to_device,
|
|
modify_output_record=modify_output_record)
|
|
|
|
def apply_final_net_modifiers(
|
|
self,
|
|
trainer_net,
|
|
trainer_init_net,
|
|
grad_map,
|
|
blob_to_device=None,
|
|
modify_output_record=False,
|
|
):
|
|
for modifier in self._final_net_modifiers:
|
|
modifier(trainer_net, trainer_init_net, grad_map,
|
|
blob_to_device=blob_to_device,
|
|
modify_output_record=modify_output_record)
|
|
|
|
def apply_optimizers(
|
|
self,
|
|
train_net,
|
|
train_init_net,
|
|
grad_map,
|
|
blob_to_device=None,
|
|
):
|
|
CPU = muji.OnCPU()
|
|
# if given, blob_to_device is a map from blob to device_option
|
|
blob_to_device = blob_to_device or {}
|
|
for param, optimizer in viewitems(self.param_to_optim):
|
|
assert optimizer is not None, \
|
|
"default optimizer must have been set in add_layer"
|
|
# note that not all params has gradient and thus we sent None if
|
|
# gradient does not exists
|
|
device = get_param_device(
|
|
param,
|
|
grad_map.get(str(param)),
|
|
param_to_device=blob_to_device,
|
|
default_device=CPU,
|
|
)
|
|
if device is not None:
|
|
# extra info is not applicable for optimizers
|
|
del device.extra_info[:]
|
|
|
|
with core.DeviceScope(device):
|
|
optimizer(
|
|
train_net, train_init_net, param, grad_map.get(str(param)))
|
|
|
|
def _GetOne(self):
|
|
return self.global_constants['ONE']
|
|
|
|
# An optimizer which allows us to do NO optimization
|
|
def NoOptim(self, *args, **kwargs):
|
|
pass
|
|
|
|
@property
|
|
def breakdown_map(self):
|
|
return self._breakdown_map
|
|
|
|
@breakdown_map.setter
|
|
def breakdown_map(self, breakdown_map):
|
|
# TODO(xlwang): provide more rich feature information in breakdown_map;
|
|
# and change the assertion accordingly
|
|
assert isinstance(breakdown_map, dict)
|
|
assert all(isinstance(k, six.string_types) for k in breakdown_map)
|
|
assert sorted(breakdown_map.values()) == list(range(len(breakdown_map)))
|
|
self._breakdown_map = breakdown_map
|