## @package fc_with_bootstrap # Module caffe2.python.layers.fc_with_bootstrap import math import numpy as np from caffe2.python import core, schema from caffe2.python.helpers.arg_scope import get_current_scope from caffe2.python.layers.layers import ModelLayer from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin def get_fc_predictor_version(fc_version): assert fc_version in ["fp32"], ( "Only support fp32 for the fully connected layer " "in the predictor net, the provided FC precision is {}".format(fc_version) ) return fc_version class FCWithBootstrap(SamplingTrainableMixin, ModelLayer): def __init__( self, model, input_record, output_dims, num_bootstrap, weight_init=None, bias_init=None, weight_optim=None, bias_optim=None, name="fc_with_bootstrap", weight_reg=None, bias_reg=None, clip_param=None, axis=1, **kwargs ): super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs) assert isinstance( input_record, schema.Scalar ), "Incorrect input type {}".format(input_record) assert ( len(input_record.field_types()[0].shape) > 0 ), "FC expects limited dimensions of the input tensor" assert axis >= 1, "axis {} should >= 1.".format(axis) self.axis = axis input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :]) assert input_dims > 0, "FC expects input dimensions > 0, got {}".format( input_dims ) self.clip_args = None # attributes for bootstrapping below self.num_bootstrap = num_bootstrap # input dim shape self.input_dims = input_dims # bootstrapped fully-connected layers to be used in eval time self.bootstrapped_FCs = [] # scalar containing batch_size blob so that we don't need to recompute self.batch_size = None # we want this to be the last FC, so the output_dim should be 1, set to None self.output_dim_vec = None # lower bound when creating random indices self.lower_bound = None # upper bound when creating random indices self.upper_bound = None if clip_param is not None: assert len(clip_param) == 2, ( "clip_param must be a tuple / list " "of length 2 and in the form of (clip_min, clip max)" ) clip_min, clip_max = clip_param assert ( clip_min is not None or clip_max is not None ), "clip_min, and clip_max in clip_param cannot both be None" assert ( clip_min is None or clip_max is None ) or clip_min < clip_max, ( "clip_param = [clip_min, clip_max] must have clip_min < clip_max" ) self.clip_args = {} if clip_min is not None: self.clip_args["min"] = clip_min if clip_max is not None: self.clip_args["max"] = clip_max scale = math.sqrt(1.0 / input_dims) weight_init = ( weight_init if weight_init else ("UniformFill", {"min": -scale, "max": scale}) ) bias_init = ( bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale}) ) """ bootstrapped FCs: Ex: [ bootstrapped_weights_blob_1, bootstrapped_bias_blob_1, ..., ..., bootstrapped_weights_blob_b, bootstrapped_bias_blob_b ] output_schema: Note: indices will always be on even indices. Ex: Struct( indices_0_blob, preds_0_blob, ... ... indices_b_blob, preds_b_blob ) """ bootstrapped_FCs = [] output_schema = schema.Struct() for i in range(num_bootstrap): output_schema += schema.Struct( ( "bootstrap_iteration_{}/indices".format(i), self.get_next_blob_reference( "bootstrap_iteration_{}/indices".format(i) ), ), ( "bootstrap_iteration_{}/preds".format(i), self.get_next_blob_reference( "bootstrap_iteration_{}/preds".format(i) ), ), ) self.bootstrapped_FCs.extend( [ self.create_param( param_name="bootstrap_iteration_{}/w".format(i), shape=[output_dims, input_dims], initializer=weight_init, optimizer=weight_optim, regularizer=weight_reg, ), self.create_param( param_name="bootstrap_iteration_{}/b".format(i), shape=[output_dims], initializer=bias_init, optimizer=bias_optim, regularizer=bias_reg, ), ] ) self.output_schema = output_schema if axis == 1: output_shape = (output_dims,) else: output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1] output_shape = tuple(output_shape + [output_dims]) def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration): """ Args: net: the caffe2 net to insert operator copied_cur_layer: blob of the bootstrapped features (make sure this blob has a stop_gradient on) iteration: the bootstrap interation to generate for. Used to correctly populate the output_schema Return: A blob containing the generated indices of shape: (batch_size,) """ with core.NameScope("bootstrap_iteration_{}".format(iteration)): if iteration == 0: # capture batch_size once for efficiency input_shape = net.Shape(copied_cur_layer, "input_shape") batch_size_index = net.Const(np.array([0]), "batch_size_index") batch_size = net.Gather([input_shape, batch_size_index], "batch_size") self.batch_size = batch_size lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32) offset = net.Const(np.array([1]), "offset", dtype=np.int32) int_batch_size = net.Cast( [self.batch_size], "int_batch_size", to=core.DataType.INT32 ) upper_bound = net.Sub([int_batch_size, offset], "upper_bound") self.lower_bound = lower_bound self.upper_bound = upper_bound indices = net.UniformIntFill( [self.batch_size, self.lower_bound, self.upper_bound], self.output_schema[iteration * 2].field_blobs()[0], input_as_shape=1, ) return indices def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration): """ This method contains all the bootstrapping logic used to bootstrap the features. Only used by the train_net. Args: net: the caffe2 net to insert bootstrapping operators copied_cur_layer: the blob representing the current features. Note, this layer should have a stop_gradient on it. Returns: bootstrapped_features: blob of bootstrapped version of cur_layer with same dimensions """ # draw features based upon the bootstrapped indices bootstrapped_features = net.Gather( [copied_cur_layer, indices], net.NextScopedBlob("bootstrapped_features_{}".format(iteration)), ) bootstrapped_features = schema.Scalar( (np.float32, self.input_dims), bootstrapped_features ) return bootstrapped_features def _insert_fc_ops(self, net, features, params, outputs, version): """ Args: net: the caffe2 net to insert operator features: Scalar containing blob of the bootstrapped features or actual cur_layer features params: weight and bias for FC outputs: the output blobs version: support fp32 for now. """ if version == "fp32": pred_blob = net.FC( features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs ) return pred_blob else: raise Exception("unsupported FC type version {}".format(version)) def _add_ops(self, net, features, iteration, params, version): """ Args: params: the weight and bias, passed by either add_ops or add_train_ops function features: feature blobs to predict on. Can be the actual cur_layer or the bootstrapped_feature blobs. version: currently fp32 support only """ if self.clip_args is not None: clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params] for p, cp in zip(params, clipped_params): net.Clip([p], [cp], **self.clip_args) params = clipped_params if self.output_dim_vec is None or len(self.output_dim_vec) == 1: self._insert_fc_ops( net=net, features=features, params=params, outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]], version=version, ) def add_ops(self, net): """ Both the predict net and the eval net will call this function. For bootstrapping approach, the goal is to pass the cur_layer feature inputs through all the bootstrapped FCs that are stored under self.bootstrapped_FCs. Return the preds in the same output_schema with dummy indices (because they are not needed). """ version_info = get_current_scope().get( get_fc_predictor_version.__name__, {"fc_version": "fp32"} ) predictor_fc_fp_version = version_info["fc_version"] for i in range(self.num_bootstrap): # these are dummy indices, not to be used anywhere indices = self._generate_bootstrapped_indices( net=net, copied_cur_layer=self.input_record.field_blobs()[0], iteration=i, ) params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2] self._add_ops( net=net, features=self.input_record, params=params, iteration=i, version=predictor_fc_fp_version, ) def add_train_ops(self, net): # use the train_param_blobs to be consistent with the SamplingTrain unittest # obtain features for i in range(self.num_bootstrap): indices = self._generate_bootstrapped_indices( net=net, copied_cur_layer=self.input_record.field_blobs()[0], iteration=i, ) bootstrapped_features = self._bootstrap_ops( net=net, copied_cur_layer=self.input_record.field_blobs()[0], indices=indices, iteration=i, ) self._add_ops( net, features=bootstrapped_features, iteration=i, params=self.train_param_blobs[i * 2 : (i * 2) + 2], version="fp32", ) def get_fp16_compatible_parameters(self): if self.output_dim_vec is None or len(self.output_dim_vec) == 1: return [ blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0 ] else: raise Exception( "Currently only supports functionality for output_dim_vec == 1" ) @property def param_blobs(self): if self.output_dim_vec is None or len(self.output_dim_vec) == 1: return self.bootstrapped_FCs else: raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")