mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Add Algebra and train helpers and proxy them to CNNMH Reviewed By: salexspb Differential Revision: D4855040 fbshipit-source-id: d948ea913f674a6e47c4b72629a2d33253cb3130
464 lines
16 KiB
Python
464 lines
16 KiB
Python
## @package cnn
|
|
# Module caffe2.python.cnn
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from caffe2.python import core, scope, model_helpers
|
|
from caffe2.python.model_helper import ModelHelperBase
|
|
from caffe2.proto import caffe2_pb2
|
|
|
|
|
|
class CNNModelHelper(ModelHelperBase):
|
|
"""A helper model so we can write CNN models more easily, without having to
|
|
manually define parameter initializations and operators separately.
|
|
"""
|
|
|
|
def __init__(self, order="NCHW", name=None,
|
|
use_cudnn=True, cudnn_exhaustive_search=False,
|
|
ws_nbytes_limit=None, init_params=True,
|
|
skip_sparse_optim=False,
|
|
param_model=None):
|
|
|
|
super(CNNModelHelper, self).__init__(
|
|
skip_sparse_optim=skip_sparse_optim,
|
|
name="CNN" if name is None else name,
|
|
init_params=init_params,
|
|
param_model=param_model,
|
|
)
|
|
|
|
self.order = order
|
|
self.use_cudnn = use_cudnn
|
|
self.cudnn_exhaustive_search = cudnn_exhaustive_search
|
|
self.ws_nbytes_limit = ws_nbytes_limit
|
|
if self.order != "NHWC" and self.order != "NCHW":
|
|
raise ValueError(
|
|
"Cannot understand the CNN storage order %s." % self.order
|
|
)
|
|
|
|
def GetWeights(self, namescope=None):
|
|
if namescope is None:
|
|
namescope = scope.CurrentNameScope()
|
|
|
|
if namescope == '':
|
|
return self.weights[:]
|
|
else:
|
|
return [w for w in self.weights if w.GetNameScope() == namescope]
|
|
|
|
def GetBiases(self, namescope=None):
|
|
if namescope is None:
|
|
namescope = scope.CurrentNameScope()
|
|
|
|
if namescope == '':
|
|
return self.biases[:]
|
|
else:
|
|
return [b for b in self.biases if b.GetNameScope() == namescope]
|
|
|
|
def ImageInput(
|
|
self, blob_in, blob_out, use_gpu_transform=False, **kwargs
|
|
):
|
|
"""Image Input."""
|
|
if self.order == "NCHW":
|
|
if (use_gpu_transform):
|
|
kwargs['use_gpu_transform'] = 1 if use_gpu_transform else 0
|
|
# GPU transform will handle NHWC -> NCHW
|
|
data, label = self.net.ImageInput(
|
|
blob_in, [blob_out[0], blob_out[1]], **kwargs)
|
|
# data = self.net.Transform(data, blob_out[0], **kwargs)
|
|
pass
|
|
else:
|
|
data, label = self.net.ImageInput(
|
|
blob_in, [blob_out[0] + '_nhwc', blob_out[1]], **kwargs)
|
|
data = self.net.NHWC2NCHW(data, blob_out[0])
|
|
else:
|
|
data, label = self.net.ImageInput(
|
|
blob_in, blob_out, **kwargs)
|
|
return data, label
|
|
|
|
def _ConvBase( # noqa
|
|
self, is_nd, blob_in, blob_out, dim_in, dim_out, kernel,
|
|
weight_init=None, bias_init=None, group=1, transform_inputs=None,
|
|
**kwargs
|
|
):
|
|
kernels = []
|
|
if is_nd:
|
|
if not isinstance(kernel, list):
|
|
kernels = [kernel]
|
|
else:
|
|
kernels = kernel
|
|
else:
|
|
kernels = [kernel] * 2
|
|
|
|
if self.use_cudnn:
|
|
kwargs['engine'] = 'CUDNN'
|
|
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
|
|
if self.ws_nbytes_limit:
|
|
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
|
|
|
|
use_bias =\
|
|
False if ("no_bias" in kwargs and kwargs["no_bias"]) else True
|
|
weight_init = weight_init if weight_init else ('XavierFill', {})
|
|
bias_init = bias_init if bias_init else ('ConstantFill', {})
|
|
blob_out = blob_out or self.net.NextName()
|
|
weight_shape = [dim_out]
|
|
if self.order == "NCHW":
|
|
weight_shape.append(int(dim_in / group))
|
|
weight_shape.extend(kernels)
|
|
else:
|
|
weight_shape.extend(kernels)
|
|
weight_shape.append(int(dim_in / group))
|
|
|
|
if self.init_params:
|
|
weight = self.param_init_net.__getattr__(weight_init[0])(
|
|
[],
|
|
blob_out + '_w',
|
|
shape=weight_shape,
|
|
**weight_init[1]
|
|
)
|
|
if use_bias:
|
|
bias = self.param_init_net.__getattr__(bias_init[0])(
|
|
[],
|
|
blob_out + '_b',
|
|
shape=[dim_out, ],
|
|
**bias_init[1]
|
|
)
|
|
else:
|
|
weight = core.ScopedBlobReference(
|
|
blob_out + '_w', self.param_init_net)
|
|
if use_bias:
|
|
bias = core.ScopedBlobReference(
|
|
blob_out + '_b', self.param_init_net)
|
|
if use_bias:
|
|
self.params.extend([weight, bias])
|
|
else:
|
|
self.params.extend([weight])
|
|
|
|
self.weights.append(weight)
|
|
|
|
if use_bias:
|
|
self.biases.append(bias)
|
|
|
|
if use_bias:
|
|
inputs = [blob_in, weight, bias]
|
|
else:
|
|
inputs = [blob_in, weight]
|
|
|
|
if transform_inputs is not None:
|
|
transform_inputs(self, blob_out, inputs)
|
|
|
|
# For the operator, we no longer need to provide the no_bias field
|
|
# because it can automatically figure this out from the number of
|
|
# inputs.
|
|
if 'no_bias' in kwargs:
|
|
del kwargs['no_bias']
|
|
if group != 1:
|
|
kwargs['group'] = group
|
|
return self.net.Conv(
|
|
inputs,
|
|
blob_out,
|
|
kernels=kernels,
|
|
order=self.order,
|
|
**kwargs)
|
|
|
|
def ConvNd(self, blob_in, blob_out, dim_in, dim_out, kernel,
|
|
weight_init=None, bias_init=None, group=1, transform_inputs=None,
|
|
**kwargs):
|
|
"""N-dimensional convolution for inputs with NCHW storage order.
|
|
"""
|
|
assert self.order == "NCHW", "ConvNd only supported for NCHW storage."
|
|
return self._ConvBase(True, blob_in, blob_out, dim_in, dim_out, kernel,
|
|
weight_init, bias_init, group, transform_inputs,
|
|
**kwargs)
|
|
|
|
def Conv(self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None,
|
|
bias_init=None, group=1, transform_inputs=None, **kwargs):
|
|
"""2-dimensional convolution.
|
|
"""
|
|
return self._ConvBase(False, blob_in, blob_out, dim_in, dim_out, kernel,
|
|
weight_init, bias_init, group, transform_inputs,
|
|
**kwargs)
|
|
|
|
def ConvTranspose(
|
|
self, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None,
|
|
bias_init=None, **kwargs
|
|
):
|
|
"""ConvTranspose.
|
|
"""
|
|
weight_init = weight_init if weight_init else ('XavierFill', {})
|
|
bias_init = bias_init if bias_init else ('ConstantFill', {})
|
|
blob_out = blob_out or self.net.NextName()
|
|
weight_shape = (
|
|
[dim_in, dim_out, kernel, kernel]
|
|
if self.order == "NCHW" else [dim_in, kernel, kernel, dim_out]
|
|
)
|
|
if self.init_params:
|
|
weight = self.param_init_net.__getattr__(weight_init[0])(
|
|
[],
|
|
blob_out + '_w',
|
|
shape=weight_shape,
|
|
**weight_init[1]
|
|
)
|
|
bias = self.param_init_net.__getattr__(bias_init[0])(
|
|
[],
|
|
blob_out + '_b',
|
|
shape=[dim_out, ],
|
|
**bias_init[1]
|
|
)
|
|
else:
|
|
weight = core.ScopedBlobReference(
|
|
blob_out + '_w', self.param_init_net)
|
|
bias = core.ScopedBlobReference(
|
|
blob_out + '_b', self.param_init_net)
|
|
self.params.extend([weight, bias])
|
|
self.weights.append(weight)
|
|
self.biases.append(bias)
|
|
if self.use_cudnn:
|
|
kwargs['engine'] = 'CUDNN'
|
|
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
|
|
if self.ws_nbytes_limit:
|
|
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
|
|
return self.net.ConvTranspose(
|
|
[blob_in, weight, bias],
|
|
blob_out,
|
|
kernel=kernel,
|
|
order=self.order,
|
|
**kwargs
|
|
)
|
|
|
|
def GroupConv(
|
|
self,
|
|
blob_in,
|
|
blob_out,
|
|
dim_in,
|
|
dim_out,
|
|
kernel,
|
|
weight_init=None,
|
|
bias_init=None,
|
|
group=1,
|
|
**kwargs
|
|
):
|
|
"""Group Convolution.
|
|
|
|
This is essentially the same as Conv with a group argument passed in.
|
|
We specialize this for backward interface compatibility.
|
|
"""
|
|
return self.Conv(blob_in, blob_out, dim_in, dim_out, kernel,
|
|
weight_init=weight_init, bias_init=bias_init,
|
|
group=group, **kwargs)
|
|
|
|
def GroupConv_Deprecated(
|
|
self,
|
|
blob_in,
|
|
blob_out,
|
|
dim_in,
|
|
dim_out,
|
|
kernel,
|
|
weight_init=None,
|
|
bias_init=None,
|
|
group=1,
|
|
**kwargs
|
|
):
|
|
"""GroupConvolution's deprecated interface.
|
|
|
|
This is used to simulate a group convolution via split and concat. You
|
|
should always use the new group convolution in your new code.
|
|
"""
|
|
weight_init = weight_init if weight_init else ('XavierFill', {})
|
|
bias_init = bias_init if bias_init else ('ConstantFill', {})
|
|
use_bias = False if ("no_bias" in kwargs and kwargs["no_bias"]) else True
|
|
if self.use_cudnn:
|
|
kwargs['engine'] = 'CUDNN'
|
|
kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
|
|
if self.ws_nbytes_limit:
|
|
kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
|
|
if dim_in % group:
|
|
raise ValueError("dim_in should be divisible by group.")
|
|
if dim_out % group:
|
|
raise ValueError("dim_out should be divisible by group.")
|
|
splitted_blobs = self.net.DepthSplit(
|
|
blob_in,
|
|
['_' + blob_out + '_gconv_split_' + str(i) for i in range(group)],
|
|
dimensions=[int(dim_in / group) for i in range(group)],
|
|
order=self.order
|
|
)
|
|
weight_shape = (
|
|
[dim_out / group, dim_in / group, kernel, kernel]
|
|
if self.order == "NCHW" else
|
|
[dim_out / group, kernel, kernel, dim_in / group]
|
|
)
|
|
# Make sure that the shapes are of int format. Especially for py3 where
|
|
# int division gives float output.
|
|
weight_shape = [int(v) for v in weight_shape]
|
|
conv_blobs = []
|
|
for i in range(group):
|
|
if self.init_params:
|
|
weight = self.param_init_net.__getattr__(weight_init[0])(
|
|
[],
|
|
blob_out + '_gconv_%d_w' % i,
|
|
shape=weight_shape,
|
|
**weight_init[1]
|
|
)
|
|
if use_bias:
|
|
bias = self.param_init_net.__getattr__(bias_init[0])(
|
|
[],
|
|
blob_out + '_gconv_%d_b' % i,
|
|
shape=[int(dim_out / group)],
|
|
**bias_init[1]
|
|
)
|
|
else:
|
|
weight = core.ScopedBlobReference(
|
|
blob_out + '_gconv_%d_w' % i, self.param_init_net)
|
|
if use_bias:
|
|
bias = core.ScopedBlobReference(
|
|
blob_out + '_gconv_%d_b' % i, self.param_init_net)
|
|
if use_bias:
|
|
self.params.extend([weight, bias])
|
|
else:
|
|
self.params.extend([weight])
|
|
self.weights.append(weight)
|
|
if use_bias:
|
|
self.biases.append(bias)
|
|
if use_bias:
|
|
inputs = [weight, bias]
|
|
else:
|
|
inputs = [weight]
|
|
if 'no_bias' in kwargs:
|
|
del kwargs['no_bias']
|
|
conv_blobs.append(
|
|
splitted_blobs[i].Conv(
|
|
inputs,
|
|
blob_out + '_gconv_%d' % i,
|
|
kernel=kernel,
|
|
order=self.order,
|
|
**kwargs
|
|
)
|
|
)
|
|
concat, concat_dims = self.net.Concat(
|
|
conv_blobs,
|
|
[blob_out, "_" + blob_out + "_concat_dims"],
|
|
order=self.order
|
|
)
|
|
return concat
|
|
|
|
def FC(self, *args, **kwargs):
|
|
return model_helpers.FC(self, *args, **kwargs)
|
|
|
|
def PackedFC(self, *args, **kwargs):
|
|
return model_helpers.PackedFC(self, *args, **kwargs)
|
|
|
|
def FC_Prune(self, *args, **kwargs):
|
|
return model_helpers.FC_Prune(self, *args, **kwargs)
|
|
|
|
def FC_Decomp(self, *args, **kwargs):
|
|
return model_helpers.FC_Decomp(self, *args, **kwargs)
|
|
|
|
def FC_Sparse(self, *args, **kwargs):
|
|
return model_helpers.FC_Sparse(self, *args, **kwargs)
|
|
|
|
def Dropout(self, *args, **kwargs):
|
|
return model_helpers.Dropout(self, *args, **kwargs)
|
|
|
|
def LRN(self, *args, **kwargs):
|
|
return model_helpers.LRN(self, *args, **kwargs)
|
|
|
|
def Softmax(self, *args, **kwargs):
|
|
return model_helpers.Softmax(self, *args, use_cudnn=self.use_cudnn,
|
|
**kwargs)
|
|
|
|
def SpatialBN(self, *args, **kwargs):
|
|
return model_helpers.SpatialBN(self, *args, **kwargs)
|
|
|
|
def InstanceNorm(self, *args, **kwargs):
|
|
return model_helpers.InstanceNorm(self, *args,
|
|
use_cudnn=self.use_cudnn, **kwargs)
|
|
|
|
def Relu(self, *args, **kwargs):
|
|
return model_helpers.Relu(self, *args, order=self.order,
|
|
use_cudnn=self.use_cudnn, **kwargs)
|
|
|
|
def PRelu(self, *args, **kwargs):
|
|
return model_helpers.PRelu(self, *args, **kwargs)
|
|
|
|
def Concat(self, *args, **kwargs):
|
|
return model_helpers.Concat(self, *args, **kwargs)
|
|
|
|
def DepthConcat(self, *args, **kwargs):
|
|
return model_helpers.DepthConcat(self, *args, **kwargs)
|
|
|
|
def Sum(self, *args, **kwargs):
|
|
return model_helpers.Sum(self, *args, **kwargs)
|
|
|
|
def Transpose(self, *args, **kwargs):
|
|
return model_helpers.Transpose(self, *args, order=self.order,
|
|
use_cudnn=self.use_cudnn,
|
|
**kwargs)
|
|
|
|
def Iter(self, *args, **kwargs):
|
|
return model_helpers.Iter(self, *args, **kwargs)
|
|
|
|
def Accuracy(self, *args, **kwargs):
|
|
return model_helpers.Accuracy(self, *args, **kwargs)
|
|
|
|
def MaxPool(self, *args, **kwargs):
|
|
return model_helpers.MaxPool(self, *args, use_cudnn=self.use_cudnn,
|
|
**kwargs)
|
|
|
|
def AveragePool(self, *args, **kwargs):
|
|
return model_helpers.AveragePool(self, *args, use_cudnn=self.use_cudnn,
|
|
**kwargs)
|
|
|
|
def PadImage(
|
|
self, blob_in, blob_out, **kwargs
|
|
):
|
|
self.net.PadImage(blob_in, blob_out, **kwargs)
|
|
|
|
@property
|
|
def XavierInit(self):
|
|
return ('XavierFill', {})
|
|
|
|
def ConstantInit(self, value):
|
|
return ('ConstantFill', dict(value=value))
|
|
|
|
@property
|
|
def MSRAInit(self):
|
|
return ('MSRAFill', {})
|
|
|
|
@property
|
|
def ZeroInit(self):
|
|
return ('ConstantFill', {})
|
|
|
|
def AddWeightDecay(self, weight_decay):
|
|
"""Adds a decay to weights in the model.
|
|
|
|
This is a form of L2 regularization.
|
|
|
|
Args:
|
|
weight_decay: strength of the regularization
|
|
"""
|
|
if weight_decay <= 0.0:
|
|
return
|
|
wd = self.param_init_net.ConstantFill([], 'wd', shape=[1],
|
|
value=weight_decay)
|
|
ONE = self.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
|
|
for param in self.GetWeights():
|
|
# Equivalent to: grad += wd * param
|
|
grad = self.param_to_grad[param]
|
|
self.net.WeightedSum(
|
|
[grad, ONE, param, wd],
|
|
grad,
|
|
)
|
|
|
|
@property
|
|
def CPU(self):
|
|
device_option = caffe2_pb2.DeviceOption()
|
|
device_option.device_type = caffe2_pb2.CPU
|
|
return device_option
|
|
|
|
@property
|
|
def GPU(self, gpu_id=0):
|
|
device_option = caffe2_pb2.DeviceOption()
|
|
device_option.device_type = caffe2_pb2.CUDA
|
|
device_option.cuda_gpu_id = gpu_id
|
|
return device_option
|