## @package cnn # Module caffe2.python.cnn from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from caffe2.python import core, scope, model_helpers from caffe2.python.model_helper import ModelHelperBase from caffe2.proto import caffe2_pb2 class CNNModelHelper(ModelHelperBase): """A helper model so we can write CNN models more easily, without having to manually define parameter initializations and operators separately. """ def __init__(self, order="NCHW", name=None, use_cudnn=True, cudnn_exhaustive_search=False, ws_nbytes_limit=None, init_params=True, skip_sparse_optim=False, param_model=None): super(CNNModelHelper, self).__init__( skip_sparse_optim=skip_sparse_optim, name="CNN" if name is None else name, init_params=init_params, param_model=param_model, ) self.order = order self.use_cudnn = use_cudnn self.cudnn_exhaustive_search = cudnn_exhaustive_search self.ws_nbytes_limit = ws_nbytes_limit if self.order != "NHWC" and self.order != "NCHW": raise ValueError( "Cannot understand the CNN storage order %s." % self.order ) def GetWeights(self, namescope=None): if namescope is None: namescope = scope.CurrentNameScope() if namescope == '': return self.weights[:] else: return [w for w in self.weights if w.GetNameScope() == namescope] def GetBiases(self, namescope=None): if namescope is None: namescope = scope.CurrentNameScope() if namescope == '': return self.biases[:] else: return [b for b in self.biases if b.GetNameScope() == namescope] def ImageInput( self, blob_in, blob_out, use_gpu_transform=False, **kwargs ): """Image Input.""" if self.order == "NCHW": if (use_gpu_transform): kwargs['use_gpu_transform'] = 1 if use_gpu_transform else 0 # GPU transform will handle NHWC -> NCHW data, label = self.net.ImageInput( blob_in, [blob_out[0], blob_out[1]], **kwargs) # data = self.net.Transform(data, blob_out[0], **kwargs) pass else: data, label = self.net.ImageInput( blob_in, [blob_out[0] + '_nhwc', blob_out[1]], **kwargs) data = self.net.NHWC2NCHW(data, blob_out[0]) else: data, label = self.net.ImageInput( blob_in, blob_out, **kwargs) return data, label def _ConvBase( # noqa self, is_nd, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, group=1, transform_inputs=None, **kwargs ): kernels = [] if is_nd: if not isinstance(kernel, list): kernels = [kernel] else: kernels = kernel else: kernels = [kernel] * 2 if self.use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = self.cudnn_exhaustive_search if self.ws_nbytes_limit: kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit use_bias =\ False if ("no_bias" in kwargs and kwargs["no_bias"]) else True weight_init = weight_init if weight_init else ('XavierFill', {}) bias_init = bias_init if bias_init else ('ConstantFill', {}) blob_out = blob_out or self.net.NextName() weight_shape = [dim_out] if self.order == "NCHW": weight_shape.append(int(dim_in / group)) weight_shape.extend(kernels) else: weight_shape.extend(kernels) weight_shape.append(int(dim_in / group)) if self.init_params: weight = self.param_init_net.__getattr__(weight_init[0])( [], blob_out + '_w', shape=weight_shape, **weight_init[1] ) if use_bias: bias = self.param_init_net.__getattr__(bias_init[0])( [], blob_out + '_b', shape=[dim_out, ], **bias_init[1] ) else: weight = core.ScopedBlobReference( blob_out + '_w', self.param_init_net) if use_bias: bias = core.ScopedBlobReference( blob_out + '_b', self.param_init_net) if use_bias: self.params.extend([weight, bias]) else: self.params.extend([weight]) self.weights.append(weight) if use_bias: self.biases.append(bias) if use_bias: inputs = [blob_in, weight, bias] else: inputs = [blob_in, weight] if transform_inputs is not None: transform_inputs(self, blob_out, inputs) # For the operator, we no longer need to provide the no_bias field # because it can automatically figure this out from the number of # inputs. if 'no_bias' in kwargs: del kwargs['no_bias'] if group != 1: kwargs['group'] = group return self.net.Conv( inputs, blob_out, kernels=kernels, order=self.order, **kwargs) def ConvNd(self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, group=1, transform_inputs=None, **kwargs): """N-dimensional convolution for inputs with NCHW storage order. """ assert self.order == "NCHW", "ConvNd only supported for NCHW storage." return self._ConvBase(True, blob_in, blob_out, dim_in, dim_out, kernel, weight_init, bias_init, group, transform_inputs, **kwargs) def Conv(self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, group=1, transform_inputs=None, **kwargs): """2-dimensional convolution. """ return self._ConvBase(False, blob_in, blob_out, dim_in, dim_out, kernel, weight_init, bias_init, group, transform_inputs, **kwargs) def ConvTranspose( self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, **kwargs ): """ConvTranspose. """ weight_init = weight_init if weight_init else ('XavierFill', {}) bias_init = bias_init if bias_init else ('ConstantFill', {}) blob_out = blob_out or self.net.NextName() weight_shape = ( [dim_in, dim_out, kernel, kernel] if self.order == "NCHW" else [dim_in, kernel, kernel, dim_out] ) if self.init_params: weight = self.param_init_net.__getattr__(weight_init[0])( [], blob_out + '_w', shape=weight_shape, **weight_init[1] ) bias = self.param_init_net.__getattr__(bias_init[0])( [], blob_out + '_b', shape=[dim_out, ], **bias_init[1] ) else: weight = core.ScopedBlobReference( blob_out + '_w', self.param_init_net) bias = core.ScopedBlobReference( blob_out + '_b', self.param_init_net) self.params.extend([weight, bias]) self.weights.append(weight) self.biases.append(bias) if self.use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = self.cudnn_exhaustive_search if self.ws_nbytes_limit: kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit return self.net.ConvTranspose( [blob_in, weight, bias], blob_out, kernel=kernel, order=self.order, **kwargs ) def GroupConv( self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, group=1, **kwargs ): """Group Convolution. This is essentially the same as Conv with a group argument passed in. We specialize this for backward interface compatibility. """ return self.Conv(blob_in, blob_out, dim_in, dim_out, kernel, weight_init=weight_init, bias_init=bias_init, group=group, **kwargs) def GroupConv_Deprecated( self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, group=1, **kwargs ): """GroupConvolution's deprecated interface. This is used to simulate a group convolution via split and concat. You should always use the new group convolution in your new code. """ weight_init = weight_init if weight_init else ('XavierFill', {}) bias_init = bias_init if bias_init else ('ConstantFill', {}) use_bias = False if ("no_bias" in kwargs and kwargs["no_bias"]) else True if self.use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = self.cudnn_exhaustive_search if self.ws_nbytes_limit: kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit if dim_in % group: raise ValueError("dim_in should be divisible by group.") if dim_out % group: raise ValueError("dim_out should be divisible by group.") splitted_blobs = self.net.DepthSplit( blob_in, ['_' + blob_out + '_gconv_split_' + str(i) for i in range(group)], dimensions=[int(dim_in / group) for i in range(group)], order=self.order ) weight_shape = ( [dim_out / group, dim_in / group, kernel, kernel] if self.order == "NCHW" else [dim_out / group, kernel, kernel, dim_in / group] ) # Make sure that the shapes are of int format. Especially for py3 where # int division gives float output. weight_shape = [int(v) for v in weight_shape] conv_blobs = [] for i in range(group): if self.init_params: weight = self.param_init_net.__getattr__(weight_init[0])( [], blob_out + '_gconv_%d_w' % i, shape=weight_shape, **weight_init[1] ) if use_bias: bias = self.param_init_net.__getattr__(bias_init[0])( [], blob_out + '_gconv_%d_b' % i, shape=[int(dim_out / group)], **bias_init[1] ) else: weight = core.ScopedBlobReference( blob_out + '_gconv_%d_w' % i, self.param_init_net) if use_bias: bias = core.ScopedBlobReference( blob_out + '_gconv_%d_b' % i, self.param_init_net) if use_bias: self.params.extend([weight, bias]) else: self.params.extend([weight]) self.weights.append(weight) if use_bias: self.biases.append(bias) if use_bias: inputs = [weight, bias] else: inputs = [weight] if 'no_bias' in kwargs: del kwargs['no_bias'] conv_blobs.append( splitted_blobs[i].Conv( inputs, blob_out + '_gconv_%d' % i, kernel=kernel, order=self.order, **kwargs ) ) concat, concat_dims = self.net.Concat( conv_blobs, [blob_out, "_" + blob_out + "_concat_dims"], order=self.order ) return concat def FC(self, *args, **kwargs): return model_helpers.FC(self, *args, **kwargs) def PackedFC(self, *args, **kwargs): return model_helpers.PackedFC(self, *args, **kwargs) def FC_Prune(self, *args, **kwargs): return model_helpers.FC_Prune(self, *args, **kwargs) def FC_Decomp(self, *args, **kwargs): return model_helpers.FC_Decomp(self, *args, **kwargs) def FC_Sparse(self, *args, **kwargs): return model_helpers.FC_Sparse(self, *args, **kwargs) def Dropout(self, *args, **kwargs): return model_helpers.Dropout(self, *args, **kwargs) def LRN(self, blob_in, blob_out, **kwargs): """LRN""" return self.net.LRN( blob_in, [blob_out, "_" + blob_out + "_scale"], order=self.order, **kwargs )[0] def MaxPool(self, *args, **kwargs): return model_helpers.MaxPool(self, *args, use_cudnn=self.use_cudnn, **kwargs) def AveragePool(self, *args, **kwargs): return model_helpers.AveragePool(self, *args, use_cudnn=self.use_cudnn, **kwargs) def Concat(self, blobs_in, blob_out, **kwargs): """Depth Concat.""" return self.net.Concat( blobs_in, [blob_out, "_" + blob_out + "_concat_dims"], order=self.order, **kwargs )[0] def DepthConcat(self, blobs_in, blob_out, **kwargs): """The old depth concat function - we should move to use concat.""" print("DepthConcat is deprecated. use Concat instead.") return self.Concat(blobs_in, blob_out, **kwargs) def PRelu(self, blob_in, blob_out, num_channels=1, slope_init=None, **kwargs): """PRelu""" slope_init = ( slope_init if slope_init else ('ConstantFill', {'value': 0.25})) if self.init_params: slope = self.param_init_net.__getattr__(slope_init[0])( [], blob_out + '_slope', shape=[num_channels], **slope_init[1] ) else: slope = core.ScopedBlobReference( blob_out + '_slope', self.param_init_net) self.params.extend([slope]) return self.net.PRelu([blob_in, slope], [blob_out]) def Relu(self, blob_in, blob_out, **kwargs): """Relu.""" if self.use_cudnn: kwargs['engine'] = 'CUDNN' return self.net.Relu(blob_in, blob_out, order=self.order, **kwargs) def Transpose(self, blob_in, blob_out, **kwargs): """Transpose.""" if self.use_cudnn: kwargs['engine'] = 'CUDNN' return self.net.Transpose(blob_in, blob_out, **kwargs) def Softmax(self, blob_in, blob_out=None, **kwargs): """Softmax.""" if self.use_cudnn: kwargs['engine'] = 'CUDNN' if blob_out is not None: return self.net.Softmax(blob_in, blob_out, **kwargs) else: return self.net.Softmax(blob_in, **kwargs) def Sum(self, blob_in, blob_out, **kwargs): """Sum""" return self.net.Sum(blob_in, blob_out, **kwargs) def InstanceNorm(self, blob_in, blob_out, dim_in, **kwargs): blob_out = blob_out or self.net.NextName() # Input: input, scale, bias # Output: output, saved_mean, saved_inv_std # scale: initialize with ones # bias: initialize with zeros def init_blob(value, suffix): return self.param_init_net.ConstantFill( [], blob_out + "_" + suffix, shape=[dim_in], value=value) scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b") self.params.extend([scale, bias]) self.weights.append(scale) self.biases.append(bias) blob_outs = [blob_out, blob_out + "_sm", blob_out + "_siv"] if 'is_test' in kwargs and kwargs['is_test']: blob_outputs = self.net.InstanceNorm( [blob_in, scale, bias], [blob_out], order=self.order, **kwargs) return blob_outputs else: blob_outputs = self.net.InstanceNorm( [blob_in, scale, bias], blob_outs, order=self.order, **kwargs) # Return the output return blob_outputs[0] def SpatialBN(self, blob_in, blob_out, dim_in, **kwargs): blob_out = blob_out or self.net.NextName() # Input: input, scale, bias, est_mean, est_inv_var # Output: output, running_mean, running_inv_var, saved_mean, # saved_inv_var # scale: initialize with ones # bias: initialize with zeros # est mean: zero # est var: ones def init_blob(value, suffix): return self.param_init_net.ConstantFill( [], blob_out + "_" + suffix, shape=[dim_in], value=value) if self.init_params: scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b") running_mean = init_blob(0.0, "rm") running_inv_var = init_blob(1.0, "riv") else: scale = core.ScopedBlobReference( blob_out + '_s', self.param_init_net) bias = core.ScopedBlobReference( blob_out + '_b', self.param_init_net) running_mean = core.ScopedBlobReference( blob_out + '_rm', self.param_init_net) running_inv_var = core.ScopedBlobReference( blob_out + '_riv', self.param_init_net) self.params.extend([scale, bias]) self.computed_params.extend([running_mean, running_inv_var]) self.weights.append(scale) self.biases.append(bias) blob_outs = [blob_out, running_mean, running_inv_var, blob_out + "_sm", blob_out + "_siv"] if 'is_test' in kwargs and kwargs['is_test']: blob_outputs = self.net.SpatialBN( [blob_in, scale, bias, blob_outs[1], blob_outs[2]], [blob_out], order=self.order, **kwargs) return blob_outputs else: blob_outputs = self.net.SpatialBN( [blob_in, scale, bias, blob_outs[1], blob_outs[2]], blob_outs, order=self.order, **kwargs) # Return the output return blob_outputs[0] def Iter(self, blob_out, **kwargs): if 'device_option' in kwargs: del kwargs['device_option'] self.param_init_net.ConstantFill( [], blob_out, shape=[1], value=0, dtype=core.DataType.INT64, device_option=core.DeviceOption(caffe2_pb2.CPU, 0), **kwargs) return self.net.Iter(blob_out, blob_out, **kwargs) def Accuracy(self, blob_in, blob_out, **kwargs): dev = kwargs['device_option'] if 'device_option' in kwargs \ else scope.CurrentDeviceScope() is_cpu = dev is None or dev.device_type == caffe2_pb2.CPU # We support top_k > 1 only on CPU if not is_cpu and 'top_k' in kwargs and kwargs['top_k'] > 1: pred_host = self.net.CopyGPUToCPU(blob_in[0], blob_in[0] + "_host") label_host = self.net.CopyGPUToCPU(blob_in[1], blob_in[1] + "_host") # Now use the Host version of the accuracy op self.net.Accuracy([pred_host, label_host], blob_out, device_option=core.DeviceOption(caffe2_pb2.CPU, 0), **kwargs) else: self.net.Accuracy(blob_in, blob_out) def PadImage( self, blob_in, blob_out, **kwargs ): self.net.PadImage(blob_in, blob_out, **kwargs) @property def XavierInit(self): return ('XavierFill', {}) def ConstantInit(self, value): return ('ConstantFill', dict(value=value)) @property def MSRAInit(self): return ('MSRAFill', {}) @property def ZeroInit(self): return ('ConstantFill', {}) def AddWeightDecay(self, weight_decay): """Adds a decay to weights in the model. This is a form of L2 regularization. Args: weight_decay: strength of the regularization """ if weight_decay <= 0.0: return wd = self.param_init_net.ConstantFill([], 'wd', shape=[1], value=weight_decay) ONE = self.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) for param in self.GetWeights(): # Equivalent to: grad += wd * param grad = self.param_to_grad[param] self.net.WeightedSum( [grad, ONE, param, wd], grad, ) @property def CPU(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CPU return device_option @property def GPU(self, gpu_id=0): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = gpu_id return device_option