from caffe2.python import core from caffe2.proto import caffe2_pb2 import logging class CNNModelHelper(object): """A helper model so we can write CNN models more easily, without having to manually define parameter initializations and operators separately. """ def __init__(self, order="NCHW", name=None, use_cudnn=True, cudnn_exhaustive_search=False, ws_nbytes_limit=None): if name is None: name = "CNN" self.net = core.Net(name) self.param_init_net = core.Net(name + '_init') self.params = [] self.param_to_grad = {} self.weights = [] self.biases = [] self.order = order self.use_cudnn = use_cudnn self.cudnn_exhaustive_search = cudnn_exhaustive_search self.ws_nbytes_limit = ws_nbytes_limit if self.order != "NHWC" and self.order != "NCHW": raise ValueError( "Cannot understand the CNN storage order %s." % self.order ) def Proto(self): return self.net.Proto() def CreateDB(self, blob_out, db, db_type, **kwargs): dbreader = self.param_init_net.CreateDB( [], blob_out, db=db, db_type=db_type, **kwargs) return dbreader def ImageInput( self, blob_in, blob_out, **kwargs ): """Image Input.""" if self.order == "NCHW": data, label = self.net.ImageInput( blob_in, [blob_out[0] + '_nhwc', blob_out[1]], **kwargs) data = self.net.NHWC2NCHW(data, blob_out[0]) else: data, label = self.net.ImageInput( blob_in, blob_out, **kwargs) return data, label def TensorProtosDBInput( self, unused_blob_in, blob_out, batch_size, db, db_type, **kwargs ): """TensorProtosDBInput.""" dbreader_name = "dbreader_" + db dbreader = self.param_init_net.CreateDB([], dbreader_name, db=db, db_type=db_type) return self.net.TensorProtosDBInput( dbreader, blob_out, batch_size=batch_size) def Conv( self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, **kwargs ): """Convolution. We intentionally do not provide odd kernel/stride/pad settings in order to discourage the use of odd cases. """ weight_init = weight_init if weight_init else ('XavierFill', {}) bias_init = bias_init if bias_init else ('ConstantFill', {}) blob_out = blob_out or self.net.NextName() weight_shape = ( [dim_out, dim_in, kernel, kernel] if self.order == "NCHW" else [dim_out, kernel, kernel, dim_in] ) weight = self.param_init_net.__getattr__(weight_init[0])( [], blob_out + '_w', shape=weight_shape, **weight_init[1] ) bias = self.param_init_net.__getattr__(bias_init[0])( [], blob_out + '_b', shape=[dim_out, ], **bias_init[1] ) self.params.extend([weight, bias]) self.weights.append(weight) self.biases.append(bias) if self.use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = self.cudnn_exhaustive_search if self.ws_nbytes_limit: kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit return self.net.Conv( [blob_in, weight, bias], blob_out, kernel=kernel, order=self.order, **kwargs ) def GroupConv( self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init, bias_init, group=1, **kwargs ): """Convolution. We intentionally do not provide odd kernel/stride/pad settings in order to discourage the use of odd cases. """ if self.use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = self.cudnn_exhaustive_search if self.ws_nbytes_limit: kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit if dim_in % group: raise ValueError("dim_in should be divisible by group.") splitted_blobs = self.net.DepthSplit( blob_in, ['_' + blob_out + '_gconv_split_' + str(i) for i in range(group)], dimensions=[dim_in / group for i in range(group)], order=self.order ) weight_shape = ( [dim_out / group, dim_in / group, kernel, kernel] if self.order == "NCHW" else [dim_out / group, kernel, kernel, dim_in / group] ) conv_blobs = [] for i in range(group): weight = self.param_init_net.__getattr__(weight_init[0])( [], blob_out + '_gconv_%d_w' % i, shape=weight_shape, **weight_init[1] ) bias = self.param_init_net.__getattr__(bias_init[0])( [], blob_out + '_gconv_%d_b' % i, shape=[dim_out / group], **bias_init[1] ) self.params.extend([weight, bias]) self.weights.append(weight) self.biases.append(bias) conv_blobs.append( splitted_blobs[i].Conv( [weight, bias], blob_out + '_gconv_%d' % i, kernel=kernel, order=self.order, **kwargs ) ) concat, concat_dims = self.net.DepthConcat( conv_blobs, [blob_out, "_" + blob_out + "_concat_dims"], order=self.order ) return concat def FC( self, blob_in, blob_out, dim_in, dim_out, weight_init=None, bias_init=None, **kwargs ): """FC""" weight_init = weight_init if weight_init else ('XavierFill', {}) bias_init = bias_init if bias_init else ('ConstantFill', {}) blob_out = blob_out or self.net.NextName() weight = self.param_init_net.__getattr__(weight_init[0])( [], blob_out + '_w', shape=[dim_out, dim_in], **weight_init[1] ) bias = self.param_init_net.__getattr__(bias_init[0])( [], blob_out + '_b', shape=[dim_out, ], **bias_init[1] ) self.params.extend([weight, bias]) return self.net.FC([blob_in, weight, bias], blob_out, **kwargs) def LRN(self, blob_in, blob_out, **kwargs): """LRN""" return self.net.LRN( blob_in, [blob_out, "_" + blob_out + "_scale"], order=self.order, **kwargs )[0] def Dropout(self, blob_in, blob_out, **kwargs): """Dropout""" return self.net.Dropout( blob_in, [blob_out, "_" + blob_out + "_mask"], **kwargs )[0] def MaxPool(self, blob_in, blob_out, **kwargs): """Max pooling""" if self.use_cudnn: kwargs['engine'] = 'CUDNN' return self.net.MaxPool(blob_in, blob_out, order=self.order, **kwargs) def AveragePool(self, blob_in, blob_out, **kwargs): """Average pooling""" if self.use_cudnn: kwargs['engine'] = 'CUDNN' return self.net.AveragePool( blob_in, blob_out, order=self.order, **kwargs ) def DepthConcat(self, blobs_in, blob_out, **kwargs): """Depth Concat.""" return self.net.DepthConcat( blobs_in, [blob_out, "_" + blob_out + "_condat_dims"], order=self.order )[0] def Relu(self, blob_in, blob_out, **kwargs): """Relu.""" if self.use_cudnn: kwargs['engine'] = 'CUDNN' return self.net.Relu(blob_in, blob_out, order=self.order, **kwargs) def SpatialBN(self, blob_in, blob_out, dim_in, **kwargs): blob_out = blob_out or self.net.NextName() # Input: input, scale, bias, est_mean, est_inv_var # Output: output, running_mean, running_inv_var, saved_mean, saved_inv_var # scale: initialize with ones # bias: initialize with zeros # est mean: zero # est var: ones def init_blob(value, suffix): return self.param_init_net.ConstantFill( [], blob_out + "_" + suffix, shape=[dim_in], value=value) scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b") self.params.extend([scale, bias]) self.weights.append(scale) self.biases.append(bias) blob_outs = [blob_out, blob_out + "_rm", blob_out + "_riv", blob_out + "_sm", blob_out + "_siv"] blob_outputs = self.net.SpatialBN( [blob_in, scale, bias], blob_outs, order=self.order, **kwargs) # Return the output return blob_outputs[0] @property def XavierInit(self): return ('XavierFill', {}) @property def MSRAInit(self): return ('MSRAFill', {}) @property def ZeroInit(self): return ('ConstantFill', {}) def AddGradientOperators(self, **kwargs): grad_map = self.net.AddGradientOperators(**kwargs) for p in self.params: if str(p) in grad_map: self.param_to_grad[p] = grad_map[str(p)] return grad_map @property def CPU(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CPU return device_option @property def GPU(self, gpu_id=0): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = gpu_id return device_option def __getattr__(self, op_type): """Catch-all for all other operators, mostly those without params.""" if not core.IsOperator(op_type): raise RuntimeError( 'Method ' + op_type + ' is not a registered operator.' ) logging.warning("You are creating an op that the CNNModelHelper " "does not recognize: {}.".format(op_type)) return self.net.__getattr__(op_type)