pytorch/caffe2/python/cnn.py

from caffe2.python import core
from caffe2.proto import caffe2_pb2

import logging

class CNNModelHelper(object):
    """A helper model so we can write CNN models more easily, without having to
    manually define parameter initializations and operators separately.
    """

    def __init__(self, order="NCHW", name=None,
                 use_cudnn=True, cudnn_exhaustive_search=False,
                 ws_nbytes_limit=None):
        if name is None:
            name = "CNN"
        self.net = core.Net(name)
        self.param_init_net = core.Net(name + '_init')
        self.params = []
        self.param_to_grad = {}
        self.weights = []
        self.biases = []
        self.order = order
        self.use_cudnn = use_cudnn
        self.cudnn_exhaustive_search = cudnn_exhaustive_search
        self.ws_nbytes_limit = ws_nbytes_limit
        if self.order != "NHWC" and self.order != "NCHW":
            raise ValueError(
                "Cannot understand the CNN storage order %s." % self.order
            )

    def Proto(self):
        return self.net.Proto()

    def CreateDB(self, blob_out, db, db_type, **kwargs):
        dbreader = self.param_init_net.CreateDB(
            [], blob_out, db=db, db_type=db_type, **kwargs)
        return dbreader


    def ImageInput(
            self, blob_in, blob_out, **kwargs
    ):
        """Image Input."""
        if self.order == "NCHW":
            data, label = self.net.ImageInput(
                blob_in, [blob_out[0] + '_nhwc', blob_out[1]], **kwargs)
            data = self.net.NHWC2NCHW(data, blob_out[0])
        else:
            data, label = self.net.ImageInput(
                blob_in, blob_out, **kwargs)
        return data, label


    def TensorProtosDBInput(
        self, unused_blob_in, blob_out, batch_size, db, db_type, **kwargs
    ):
        """TensorProtosDBInput."""
        dbreader_name = "dbreader_" + db
        dbreader = self.param_init_net.CreateDB([], dbreader_name,
            db=db, db_type=db_type)
        return self.net.TensorProtosDBInput(
            dbreader, blob_out, batch_size=batch_size)

    def Conv(
        self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None,
        bias_init=None, **kwargs
    ):
        """Convolution. We intentionally do not provide odd kernel/stride/pad
        settings in order to discourage the use of odd cases.
        """
        weight_init = weight_init if weight_init else ('XavierFill', {})
        bias_init = bias_init if bias_init else ('ConstantFill', {})
        blob_out = blob_out or self.net.NextName()
        weight_shape = (
            [dim_out, dim_in, kernel, kernel]
            if self.order == "NCHW" else [dim_out, kernel, kernel, dim_in]
        )
        weight = self.param_init_net.__getattr__(weight_init[0])(
            [],
            blob_out + '_w',
            shape=weight_shape,
            **weight_init[1]
        )
        bias = self.param_init_net.__getattr__(bias_init[0])(
            [],
            blob_out + '_b',
            shape=[dim_out, ],
            **bias_init[1]
        )
        self.params.extend([weight, bias])
        self.weights.append(weight)
        self.biases.append(bias)
        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
            if self.ws_nbytes_limit:
                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
        return self.net.Conv(
            [blob_in, weight, bias],
            blob_out,
            kernel=kernel,
            order=self.order,
            **kwargs
        )

    def GroupConv(
        self,
        blob_in,
        blob_out,
        dim_in,
        dim_out,
        kernel,
        weight_init,
        bias_init,
        group=1,
        **kwargs
    ):
        """Convolution. We intentionally do not provide odd kernel/stride/pad
        settings in order to discourage the use of odd cases.
        """
        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
            if self.ws_nbytes_limit:
                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
        if dim_in % group:
            raise ValueError("dim_in should be divisible by group.")
        splitted_blobs = self.net.DepthSplit(
            blob_in,
            ['_' + blob_out + '_gconv_split_' + str(i) for i in range(group)],
            dimensions=[dim_in / group for i in range(group)],
            order=self.order
        )
        weight_shape = (
            [dim_out / group, dim_in / group, kernel, kernel]
            if self.order == "NCHW" else
            [dim_out / group, kernel, kernel, dim_in / group]
        )
        conv_blobs = []
        for i in range(group):
            weight = self.param_init_net.__getattr__(weight_init[0])(
                [],
                blob_out + '_gconv_%d_w' % i,
                shape=weight_shape,
                **weight_init[1]
            )
            bias = self.param_init_net.__getattr__(bias_init[0])(
                [],
                blob_out + '_gconv_%d_b' % i,
                shape=[dim_out / group],
                **bias_init[1]
            )
            self.params.extend([weight, bias])
            self.weights.append(weight)
            self.biases.append(bias)
            conv_blobs.append(
                splitted_blobs[i].Conv(
                    [weight, bias],
                    blob_out + '_gconv_%d' % i,
                    kernel=kernel,
                    order=self.order,
                    **kwargs
                )
            )
        concat, concat_dims = self.net.DepthConcat(
            conv_blobs,
            [blob_out, "_" + blob_out + "_concat_dims"],
            order=self.order
        )
        return concat

    def FC(
        self, blob_in, blob_out, dim_in, dim_out, weight_init=None,
        bias_init=None, **kwargs
    ):
        """FC"""
        weight_init = weight_init if weight_init else ('XavierFill', {})
        bias_init = bias_init if bias_init else ('ConstantFill', {})
        blob_out = blob_out or self.net.NextName()
        weight = self.param_init_net.__getattr__(weight_init[0])(
            [],
            blob_out + '_w',
            shape=[dim_out, dim_in],
            **weight_init[1]
        )
        bias = self.param_init_net.__getattr__(bias_init[0])(
            [],
            blob_out + '_b',
            shape=[dim_out, ],
            **bias_init[1]
        )
        self.params.extend([weight, bias])
        return self.net.FC([blob_in, weight, bias], blob_out, **kwargs)

    def LRN(self, blob_in, blob_out, **kwargs):
        """LRN"""
        return self.net.LRN(
            blob_in,
            [blob_out, "_" + blob_out + "_scale"],
            order=self.order,
            **kwargs
        )[0]

    def Dropout(self, blob_in, blob_out, **kwargs):
        """Dropout"""
        return self.net.Dropout(
            blob_in, [blob_out, "_" + blob_out + "_mask"], **kwargs
        )[0]

    def MaxPool(self, blob_in, blob_out, **kwargs):
        """Max pooling"""
        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
        return self.net.MaxPool(blob_in, blob_out, order=self.order, **kwargs)

    def AveragePool(self, blob_in, blob_out, **kwargs):
        """Average pooling"""
        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
        return self.net.AveragePool(
            blob_in,
            blob_out,
            order=self.order,
            **kwargs
        )

    def DepthConcat(self, blobs_in, blob_out, **kwargs):
        """Depth Concat."""
        return self.net.DepthConcat(
            blobs_in,
            [blob_out, "_" + blob_out + "_condat_dims"],
            order=self.order
        )[0]

    def Relu(self, blob_in, blob_out, **kwargs):
        """Relu."""
        if self.use_cudnn:
            kwargs['engine'] = 'CUDNN'
        return self.net.Relu(blob_in, blob_out, order=self.order, **kwargs)

    def SpatialBN(self, blob_in, blob_out, dim_in, **kwargs):
        blob_out = blob_out or self.net.NextName()
        # Input: input, scale, bias, est_mean, est_inv_var
        # Output: output, running_mean, running_inv_var, saved_mean, saved_inv_var

        # scale: initialize with ones
        # bias: initialize with zeros
        # est mean: zero
        # est var: ones

        def init_blob(value, suffix):
            return self.param_init_net.ConstantFill(
                [], blob_out + "_" + suffix, shape=[dim_in], value=value)
        scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b")
        self.params.extend([scale, bias])
        self.weights.append(scale)
        self.biases.append(bias)
        blob_outs = [blob_out, blob_out + "_rm", blob_out + "_riv",
                     blob_out + "_sm", blob_out + "_siv"]
        blob_outputs = self.net.SpatialBN(
            [blob_in, scale, bias], blob_outs,
            order=self.order, **kwargs)
        # Return the output
        return blob_outputs[0]

    @property
    def XavierInit(self):
        return ('XavierFill', {})

    @property
    def MSRAInit(self):
        return ('MSRAFill', {})

    @property
    def ZeroInit(self):
        return ('ConstantFill', {})

    def AddGradientOperators(self, **kwargs):
        grad_map = self.net.AddGradientOperators(**kwargs)
        for p in self.params:
            if str(p) in grad_map:
                self.param_to_grad[p] = grad_map[str(p)]
        return grad_map

    @property
    def CPU(self):
        device_option = caffe2_pb2.DeviceOption()
        device_option.device_type = caffe2_pb2.CPU
        return device_option

    @property
    def GPU(self, gpu_id=0):
        device_option = caffe2_pb2.DeviceOption()
        device_option.device_type = caffe2_pb2.CUDA
        device_option.cuda_gpu_id = gpu_id
        return device_option

    def __getattr__(self, op_type):
        """Catch-all for all other operators, mostly those without params."""
        if not core.IsOperator(op_type):
            raise RuntimeError(
                'Method ' + op_type + ' is not a registered operator.'
            )
        logging.warning("You are creating an op that the CNNModelHelper "
                        "does not recognize: {}.".format(op_type))
        return self.net.__getattr__(op_type)