pytorch/caffe2/python/layers/semi_random_features.py
Jiyan Yang a8695178aa Adding parameter sharing API to Dper2
Summary:
To achive this, I modified the blob name scheme defined in a layer.
Before it was scope/fc_w and scope/fc_w_auto_0 (if there is another fc
    within the same scope).
Now I change it to scope/fc/w and scope/fc_auto_0/w.
That is, we rely on the uniqueness of the scoped layer name to define
names for blobs.

I also overwrote the create_param method in LayerModelHelper to let it
use the resolved name for blobs given the sharingparameter context.

There are some details such as making the initializer more structured
that I need to finalize.

Reviewed By: kennyhorror

Differential Revision: D5435132

fbshipit-source-id: a0525f5ea0977e255dd5ea765b38913f5951d455
2017-08-03 00:33:18 -07:00

122 lines
4.8 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from caffe2.python import schema
from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
import numpy as np
class SemiRandomFeatures(ArcCosineFeatureMap):
"""
Implementation of the semi-random kernel feature map.
Applies H(x_rand) * x_rand^s * x_learned, where
H is the Heaviside step function,
x_rand is the input after applying FC with randomized parameters,
and x_learned is the input after applying FC with learnable parameters.
If using multilayer model with semi-random layers, then input and output records
should have a 'full' and 'random' Scalar. The random Scalar will be passed as
input to process the random features.
For more information, see the original paper:
https://arxiv.org/pdf/1702.08882.pdf
Inputs :
output_dims -- dimensions of the output vector
s -- if s == 0, will obtain linear semi-random features;
else if s == 1, will obtain squared semi-random features;
else s >= 2, will obtain higher order semi-random features
scale -- amount to scale the standard deviation
weight_init -- initialization distribution for weight parameter
bias_init -- initialization distribution for bias pararmeter
weight_optim -- optimizer for weight params for learned features
bias_optim -- optimizer for bias param for learned features
set_weight_as_global_constant -- if True, initialized random parameters
will be constant across all distributed
instances of the layer
"""
def __init__(
self,
model,
input_record,
output_dims,
s=0,
scale=None,
weight_init=None,
bias_init=None,
weight_optim=None,
bias_optim=None,
set_weight_as_global_constant=False,
name='semi_random_features',
**kwargs):
if isinstance(input_record, schema.Struct):
schema.is_schema_subset(
schema.Struct(
('full', schema.Scalar()),
('random', schema.Scalar()),
),
input_record
)
self.input_record_full = input_record.full
self.input_record_random = input_record.random
elif isinstance(input_record, schema.Scalar):
self.input_record_full = input_record
self.input_record_random = input_record
super(SemiRandomFeatures, self).__init__(
model,
self.input_record_full,
output_dims,
s=s,
scale=scale,
weight_init=weight_init,
bias_init=bias_init,
weight_optim=None,
bias_optim=None,
set_weight_as_global_constant=set_weight_as_global_constant,
initialize_output_schema=False,
name=name,
**kwargs)
self.output_schema = schema.Struct(
('full', schema.Scalar(
(np.float32, output_dims),
model.net.NextScopedBlob(name + '_full_output')
),),
('random', schema.Scalar(
(np.float32, output_dims),
model.net.NextScopedBlob(name + '_random_output')
),),
)
# Learned Parameters
(self.learned_w, self.learned_b) = self._initialize_params('learned_w',
'learned_b',
w_init=weight_init,
b_init=bias_init,
w_optim=weight_optim,
b_optim=bias_optim)
def add_ops(self, net):
# Learned features: wx + b
learned_features = net.FC(self.input_record_full.field_blobs() +
[self.learned_w, self.learned_b],
net.NextScopedBlob('learned_features'))
# Random features: wx + b
random_features = net.FC(self.input_record_random.field_blobs() +
[self.random_w, self.random_b],
net.NextScopedBlob('random_features'))
processed_random_features = self._heaviside_with_power(
net,
random_features,
self.output_schema.random.field_blobs(),
self.s
)
net.Mul([processed_random_features, learned_features],
self.output_schema.full.field_blobs())