from caffe2.python import schema from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap import numpy as np class SemiRandomFeatures(ArcCosineFeatureMap): """ Implementation of the semi-random kernel feature map. Applies H(x_rand) * x_rand^s * x_learned, where H is the Heaviside step function, x_rand is the input after applying FC with randomized parameters, and x_learned is the input after applying FC with learnable parameters. If using multilayer model with semi-random layers, then input and output records should have a 'full' and 'random' Scalar. The random Scalar will be passed as input to process the random features. For more information, see the original paper: https://arxiv.org/pdf/1702.08882.pdf Inputs : output_dims -- dimensions of the output vector s -- if s == 0, will obtain linear semi-random features; else if s == 1, will obtain squared semi-random features; else s >= 2, will obtain higher order semi-random features scale_random -- amount to scale the standard deviation (for random parameter initialization when weight_init or bias_init hasn't been specified) scale_learned -- amount to scale the standard deviation (for learned parameter initialization when weight_init or bias_init hasn't been specified) weight_init_random -- initialization distribution for random weight parameter (if None, will use Gaussian distribution) bias_init_random -- initialization distribution for random bias pararmeter (if None, will use Uniform distribution) weight_init_learned -- initialization distribution for learned weight parameter (if None, will use Gaussian distribution) bias_init_learned -- initialization distribution for learned bias pararmeter (if None, will use Uniform distribution) weight_optim -- optimizer for weight params for learned features bias_optim -- optimizer for bias param for learned features set_weight_as_global_constant -- if True, initialized random parameters will be constant across all distributed instances of the layer """ def __init__( self, model, input_record, output_dims, s=1, scale_random=1.0, scale_learned=1.0, weight_init_random=None, bias_init_random=None, weight_init_learned=None, bias_init_learned=None, weight_optim=None, bias_optim=None, set_weight_as_global_constant=False, name='semi_random_features', **kwargs): if isinstance(input_record, schema.Struct): schema.is_schema_subset( schema.Struct( ('full', schema.Scalar()), ('random', schema.Scalar()), ), input_record ) self.input_record_full = input_record.full self.input_record_random = input_record.random elif isinstance(input_record, schema.Scalar): self.input_record_full = input_record self.input_record_random = input_record super().__init__( model, self.input_record_full, output_dims, s=s, scale=scale_random, # To initialize the random parameters weight_init=weight_init_random, bias_init=bias_init_random, weight_optim=None, bias_optim=None, set_weight_as_global_constant=set_weight_as_global_constant, initialize_output_schema=False, name=name, **kwargs) self.output_schema = schema.Struct( ('full', schema.Scalar( (np.float32, output_dims), model.net.NextScopedBlob(name + '_full_output') ),), ('random', schema.Scalar( (np.float32, output_dims), model.net.NextScopedBlob(name + '_random_output') ),), ) # To initialize the learnable parameters assert (scale_learned > 0.0), \ "Expected scale (learned) > 0, got %s" % scale_learned self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims) # Learned Parameters (self.learned_w, self.learned_b) = self._initialize_params( 'learned_w', 'learned_b', w_init=weight_init_learned, b_init=bias_init_learned, w_optim=weight_optim, b_optim=bias_optim ) def add_ops(self, net): # Learned features: wx + b learned_features = net.FC(self.input_record_full.field_blobs() + [self.learned_w, self.learned_b], net.NextScopedBlob('learned_features')) # Random features: wx + b random_features = net.FC(self.input_record_random.field_blobs() + [self.random_w, self.random_b], net.NextScopedBlob('random_features')) processed_random_features = self._heaviside_with_power( net, random_features, self.output_schema.random.field_blobs(), self.s ) net.Mul([processed_random_features, learned_features], self.output_schema.full.field_blobs())