pytorch/caffe2/python/cnn.py
2016-05-13 14:43:48 -07:00

307 lines
10 KiB
Python

from caffe2.python import core
from caffe2.proto import caffe2_pb2
import logging
class CNNModelHelper(object):
"""A helper model so we can write CNN models more easily, without having to
manually define parameter initializations and operators separately.
"""
def __init__(self, order="NCHW", name=None,
use_cudnn=True, cudnn_exhaustive_search=False,
ws_nbytes_limit=None):
if name is None:
name = "CNN"
self.net = core.Net(name)
self.param_init_net = core.Net(name + '_init')
self.params = []
self.param_to_grad = {}
self.weights = []
self.biases = []
self.order = order
self.use_cudnn = use_cudnn
self.cudnn_exhaustive_search = cudnn_exhaustive_search
self.ws_nbytes_limit = ws_nbytes_limit
if self.order != "NHWC" and self.order != "NCHW":
raise ValueError(
"Cannot understand the CNN storage order %s." % self.order
)
def Proto(self):
return self.net.Proto()
def CreateDB(self, blob_out, db, db_type, **kwargs):
dbreader = self.param_init_net.CreateDB(
[], blob_out, db=db, db_type=db_type, **kwargs)
return dbreader
def ImageInput(
self, blob_in, blob_out, **kwargs
):
"""Image Input."""
if self.order == "NCHW":
data, label = self.net.ImageInput(
blob_in, [blob_out[0] + '_nhwc', blob_out[1]], **kwargs)
data = self.net.NHWC2NCHW(data, blob_out[0])
else:
data, label = self.net.ImageInput(
blob_in, blob_out, **kwargs)
return data, label
def TensorProtosDBInput(
self, unused_blob_in, blob_out, batch_size, db, db_type, **kwargs
):
"""TensorProtosDBInput."""
dbreader_name = "dbreader_" + db
dbreader = self.param_init_net.CreateDB([], dbreader_name,
db=db, db_type=db_type)
return self.net.TensorProtosDBInput(
dbreader, blob_out, batch_size=batch_size)
def Conv(
self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None,
bias_init=None, **kwargs
):
"""Convolution. We intentionally do not provide odd kernel/stride/pad
settings in order to discourage the use of odd cases.
"""
weight_init = weight_init if weight_init else ('XavierFill', {})
bias_init = bias_init if bias_init else ('ConstantFill', {})
blob_out = blob_out or self.net.NextName()
weight_shape = (
[dim_out, dim_in, kernel, kernel]
if self.order == "NCHW" else [dim_out, kernel, kernel, dim_in]
)
weight = self.param_init_net.__getattr__(weight_init[0])(
[],
blob_out + '_w',
shape=weight_shape,
**weight_init[1]
)
bias = self.param_init_net.__getattr__(bias_init[0])(
[],
blob_out + '_b',
shape=[dim_out, ],
**bias_init[1]
)
self.params.extend([weight, bias])
self.weights.append(weight)
self.biases.append(bias)
if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
if self.ws_nbytes_limit:
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
return self.net.Conv(
[blob_in, weight, bias],
blob_out,
kernel=kernel,
order=self.order,
**kwargs
)
def GroupConv(
self,
blob_in,
blob_out,
dim_in,
dim_out,
kernel,
weight_init,
bias_init,
group=1,
**kwargs
):
"""Convolution. We intentionally do not provide odd kernel/stride/pad
settings in order to discourage the use of odd cases.
"""
if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
if self.ws_nbytes_limit:
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
if dim_in % group:
raise ValueError("dim_in should be divisible by group.")
splitted_blobs = self.net.DepthSplit(
blob_in,
['_' + blob_out + '_gconv_split_' + str(i) for i in range(group)],
dimensions=[dim_in / group for i in range(group)],
order=self.order
)
weight_shape = (
[dim_out / group, dim_in / group, kernel, kernel]
if self.order == "NCHW" else
[dim_out / group, kernel, kernel, dim_in / group]
)
conv_blobs = []
for i in range(group):
weight = self.param_init_net.__getattr__(weight_init[0])(
[],
blob_out + '_gconv_%d_w' % i,
shape=weight_shape,
**weight_init[1]
)
bias = self.param_init_net.__getattr__(bias_init[0])(
[],
blob_out + '_gconv_%d_b' % i,
shape=[dim_out / group],
**bias_init[1]
)
self.params.extend([weight, bias])
self.weights.append(weight)
self.biases.append(bias)
conv_blobs.append(
splitted_blobs[i].Conv(
[weight, bias],
blob_out + '_gconv_%d' % i,
kernel=kernel,
order=self.order,
**kwargs
)
)
concat, concat_dims = self.net.DepthConcat(
conv_blobs,
[blob_out, "_" + blob_out + "_concat_dims"],
order=self.order
)
return concat
def FC(
self, blob_in, blob_out, dim_in, dim_out, weight_init=None,
bias_init=None, **kwargs
):
"""FC"""
weight_init = weight_init if weight_init else ('XavierFill', {})
bias_init = bias_init if bias_init else ('ConstantFill', {})
blob_out = blob_out or self.net.NextName()
weight = self.param_init_net.__getattr__(weight_init[0])(
[],
blob_out + '_w',
shape=[dim_out, dim_in],
**weight_init[1]
)
bias = self.param_init_net.__getattr__(bias_init[0])(
[],
blob_out + '_b',
shape=[dim_out, ],
**bias_init[1]
)
self.params.extend([weight, bias])
return self.net.FC([blob_in, weight, bias], blob_out, **kwargs)
def LRN(self, blob_in, blob_out, **kwargs):
"""LRN"""
return self.net.LRN(
blob_in,
[blob_out, "_" + blob_out + "_scale"],
order=self.order,
**kwargs
)[0]
def Dropout(self, blob_in, blob_out, **kwargs):
"""Dropout"""
return self.net.Dropout(
blob_in, [blob_out, "_" + blob_out + "_mask"], **kwargs
)[0]
def MaxPool(self, blob_in, blob_out, **kwargs):
"""Max pooling"""
if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
return self.net.MaxPool(blob_in, blob_out, order=self.order, **kwargs)
def AveragePool(self, blob_in, blob_out, **kwargs):
"""Average pooling"""
if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
return self.net.AveragePool(
blob_in,
blob_out,
order=self.order,
**kwargs
)
def DepthConcat(self, blobs_in, blob_out, **kwargs):
"""Depth Concat."""
return self.net.DepthConcat(
blobs_in,
[blob_out, "_" + blob_out + "_condat_dims"],
order=self.order
)[0]
def Relu(self, blob_in, blob_out, **kwargs):
"""Relu."""
if self.use_cudnn:
kwargs['engine'] = 'CUDNN'
return self.net.Relu(blob_in, blob_out, order=self.order, **kwargs)
def SpatialBN(self, blob_in, blob_out, dim_in, **kwargs):
blob_out = blob_out or self.net.NextName()
# Input: input, scale, bias, est_mean, est_inv_var
# Output: output, running_mean, running_inv_var, saved_mean, saved_inv_var
# scale: initialize with ones
# bias: initialize with zeros
# est mean: zero
# est var: ones
def init_blob(value, suffix):
return self.param_init_net.ConstantFill(
[], blob_out + "_" + suffix, shape=[dim_in], value=value)
scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b")
self.params.extend([scale, bias])
self.weights.append(scale)
self.biases.append(bias)
blob_outs = [blob_out, blob_out + "_rm", blob_out + "_riv",
blob_out + "_sm", blob_out + "_siv"]
blob_outputs = self.net.SpatialBN(
[blob_in, scale, bias], blob_outs,
order=self.order, **kwargs)
# Return the output
return blob_outputs[0]
@property
def XavierInit(self):
return ('XavierFill', {})
@property
def MSRAInit(self):
return ('MSRAFill', {})
@property
def ZeroInit(self):
return ('ConstantFill', {})
def AddGradientOperators(self, **kwargs):
grad_map = self.net.AddGradientOperators(**kwargs)
for p in self.params:
if str(p) in grad_map:
self.param_to_grad[p] = grad_map[str(p)]
return grad_map
@property
def CPU(self):
device_option = caffe2_pb2.DeviceOption()
device_option.device_type = caffe2_pb2.CPU
return device_option
@property
def GPU(self, gpu_id=0):
device_option = caffe2_pb2.DeviceOption()
device_option.device_type = caffe2_pb2.CUDA
device_option.cuda_gpu_id = gpu_id
return device_option
def __getattr__(self, op_type):
"""Catch-all for all other operators, mostly those without params."""
if not core.IsOperator(op_type):
raise RuntimeError(
'Method ' + op_type + ' is not a registered operator.'
)
logging.warning("You are creating an op that the CNNModelHelper "
"does not recognize: {}.".format(op_type))
return self.net.__getattr__(op_type)