pytorch/caffe2/python/caffe_translator.py
Viswanath Sivakumar e67425647a Support bias for Scale layer in caffe_translate
Summary:
Turns out xray models have some independent Scale layers (with bias) besides
the Conv-Scale pairs. We could still fuse it with previous layers with some
work, but for simplicity, including Add op followed by Mul for bias if needed.
We could revisit optimizations layer fusion in the future once we have
something working for xray.

Reviewed By: Yangqing

Differential Revision: D4427266

fbshipit-source-id: ef7d8677ccd7d10dbd20759eeed378d9bc4522d1
2017-01-18 09:59:21 -08:00

471 lines
17 KiB
Python

#!/usr/bin/env python2
import copy
import logging
import numpy as np
from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
from caffe.proto import caffe_pb2
from caffe2.python import core, utils
logging.basicConfig()
log = logging.getLogger("caffe_translator")
log.setLevel(logging.INFO)
def _StateMeetsRule(state, rule):
"""A function that reproduces Caffe's StateMeetsRule functionality."""
if rule.HasField('phase') and rule.phase != state.phase:
return False
if rule.HasField('min_level') and state.level < rule.min_level:
return False
if rule.HasField('max_level') and state.level > rule.max_lavel:
return False
curr_stages = set(list(state.stage))
# all stages in rule.stages should be in, otherwise it's not a match.
if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
return False
# none of the stage in rule.stages should be in, otherwise it's not a match.
if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
return False
# If none of the nonmatch happens, return True.
return True
def _ShouldInclude(net_state, layer):
"""A function that reproduces Caffe's inclusion and exclusion rule."""
ret = (len(layer.include) == 0)
# check exclude rules: if any exclusion is met, we shouldn't include.
ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
if len(layer.include):
# check include rules: if any inclusion is met, we should include.
ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
return ret
class TranslatorRegistry(object):
registry_ = {}
@classmethod
def Register(cls, op_name):
"""A decorator for registering gradient mappings."""
def Wrapper(func):
cls.registry_[op_name] = func
return func
return Wrapper
@classmethod
def TranslateLayer(cls, layer, pretrained_blobs, is_test):
try:
caffe_ops, params = cls.registry_[layer.type](
layer, pretrained_blobs, is_test)
except KeyError:
raise KeyError('No translator registered for layer: %s yet.' %
str(layer))
if caffe_ops is None:
caffe_ops = []
if type(caffe_ops) is not list:
caffe_ops = [caffe_ops]
return caffe_ops, params
@classmethod
def TranslateModel(
cls,
caffe_net,
pretrained_net,
is_test=False,
net_state=None,
):
net_state = caffe_pb2.NetState() if net_state is None else net_state
net = caffe2_pb2.NetDef()
net.name = caffe_net.name
net_params = caffe2_pb2.TensorProtos()
if len(caffe_net.layer) == 0:
raise ValueError(
'I think something is wrong. This translation script '
'only accepts new style layers that are stored in the '
'layer field.'
)
for layer in caffe_net.layer:
if not _ShouldInclude(net_state, layer):
log.info('Current net state does not need layer {}'
.format(layer.name))
continue
log.info('Translate layer {}'.format(layer.name))
# Get pretrained one
pretrained_layers = (
[l for l in pretrained_net.layer
if l.name == layer.name] + [l
for l in pretrained_net.layers
if l.name == layer.name]
)
if len(pretrained_layers) > 1:
raise ValueError(
'huh? more than one pretrained layer of one name?')
elif len(pretrained_layers) == 1:
pretrained_blobs = [
utils.CaffeBlobToNumpyArray(blob)
for blob in pretrained_layers[0].blobs
]
else:
# No pretrained layer for the given layer name. We'll just pass
# no parameter blobs.
# print 'No pretrained layer for layer', layer.name
pretrained_blobs = []
operators, params = cls.TranslateLayer(
layer, pretrained_blobs, is_test)
net.op.extend(operators)
net_params.protos.extend(params)
return net, net_params
def TranslateModel(*args, **kwargs):
return TranslatorRegistry.TranslateModel(*args, **kwargs)
def ConvertTensorProtosToInitNet(net_params):
"""Takes the net_params returned from TranslateModel, and wrap it as an
init net that contain GivenTensorFill.
This is a very simple feature that only works with float tensors, and is
only intended to be used in an environment where you want a single
initialization file - for more complex cases, use a db to store the
parameters.
"""
init_net = caffe2_pb2.NetDef()
for tensor in net_params.protos:
if len(tensor.float_data) == 0:
raise RuntimeError(
"Only float tensors are supported in this util.")
op = core.CreateOperator(
"GivenTensorFill", [], [tensor.name],
arg=[
utils.MakeArgument("shape", list(tensor.dims)),
utils.MakeArgument("values", tensor.float_data)])
init_net.op.extend([op])
return init_net
def BaseTranslate(layer, caffe2_type):
"""A simple translate interface that maps the layer input and output."""
caffe2_op = caffe2_pb2.OperatorDef()
caffe2_op.type = caffe2_type
caffe2_op.input.extend(layer.bottom)
caffe2_op.output.extend(layer.top)
return caffe2_op
def AddArgument(op, key, value):
"""Makes an argument based on the value type."""
op.arg.extend([utils.MakeArgument(key, value)])
################################################################################
# Common translators for layers.
################################################################################
@TranslatorRegistry.Register("Input")
def TranslateInput(layer, pretrained_blobs, is_test):
return [], []
@TranslatorRegistry.Register("Data")
def TranslateData(layer, pretrained_blobs, is_test):
return [], []
# A function used in convolution, pooling and deconvolution to deal with
# conv pool specific parameters.
def _TranslateStridePadKernelHelper(param, caffe_op):
try:
if (len(param.stride) > 1 or len(param.kernel_size) > 1 or
len(param.pad) > 1):
raise NotImplementedError(
"Translator currently does not support non-conventional "
"pad/kernel/stride settings."
)
stride = param.stride[0] if len(param.stride) else 1
pad = param.pad[0] if len(param.pad) else 0
kernel = param.kernel_size[0] if len(param.kernel_size) else 0
except TypeError:
# This catches the case of a PoolingParameter, in which case we are
# having non-repeating pad, stride and kernel.
stride = param.stride
pad = param.pad
kernel = param.kernel_size
# Get stride
if param.HasField("stride_h") or param.HasField("stride_w"):
AddArgument(caffe_op, "stride_h", param.stride_h)
AddArgument(caffe_op, "stride_w", param.stride_w)
else:
AddArgument(caffe_op, "stride", stride)
# Get pad
if param.HasField("pad_h") or param.HasField("pad_w"):
if param.pad_h == param.pad_w:
AddArgument(caffe_op, "pad", param.pad_h)
else:
AddArgument(caffe_op, "pad_t", param.pad_h)
AddArgument(caffe_op, "pad_b", param.pad_h)
AddArgument(caffe_op, "pad_l", param.pad_w)
AddArgument(caffe_op, "pad_r", param.pad_w)
else:
AddArgument(caffe_op, "pad", pad)
# Get kernel
if param.HasField("kernel_h") or param.HasField("kernel_w"):
AddArgument(caffe_op, "kernel_h", param.kernel_h)
AddArgument(caffe_op, "kernel_w", param.kernel_w)
else:
AddArgument(caffe_op, "kernel", kernel)
@TranslatorRegistry.Register("Convolution")
def TranslateConv(layer, pretrained_blobs, is_test):
param = layer.convolution_param
caffe_op = BaseTranslate(layer, "Conv")
output = caffe_op.output[0]
caffe_op.input.extend([output + '_w', output + '_b'])
_TranslateStridePadKernelHelper(param, caffe_op)
# weight
params = [
utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
# bias
if len(pretrained_blobs) == 2:
params.append(
utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[1].flatten(), output + '_b'))
# Group convolution option
if param.group != 1:
AddArgument(caffe_op, "group", param.group)
# Get dilation - not tested. If you have a model and this checks out,
# please provide a test and uncomment this.
if len(param.dilation) > 0:
if len(param.dilation) == 1:
AddArgument(caffe_op, "dilation", param.dilation[0])
elif len(param.dilation) == 2:
AddArgument(caffe_op, "dilation_h", param.dilation[0])
AddArgument(caffe_op, "dilation_w", param.dilation[1])
return caffe_op, params
@TranslatorRegistry.Register("Deconvolution")
def TranslateDeconv(layer, pretrained_blobs, is_test):
param = layer.convolution_param
if param.group > 1:
raise NotImplementedError(
"Translator currently does not support group deconvolution."
)
caffe_op = BaseTranslate(layer, "ConvTranspose")
output = caffe_op.output[0]
_TranslateStridePadKernelHelper(param, caffe_op)
caffe_op.input.extend([output + '_w', output + '_b'])
AddArgument(caffe_op, "order", "NCHW")
weight = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')
bias = utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[1].flatten(), output + '_b'
)
return caffe_op, [weight, bias]
@TranslatorRegistry.Register("ReLU")
def TranslateRelu(layer, pretrained_blobs, is_test):
return BaseTranslate(layer, "Relu"), []
@TranslatorRegistry.Register("Pooling")
def TranslatePool(layer, pretrained_blobs, is_test):
param = layer.pooling_param
if param.pool == caffe_pb2.PoolingParameter.MAX:
caffe_op = BaseTranslate(layer, "MaxPool")
elif param.pool == caffe_pb2.PoolingParameter.AVE:
caffe_op = BaseTranslate(layer, "AveragePool")
_TranslateStridePadKernelHelper(param, caffe_op)
AddArgument(caffe_op, "order", "NCHW")
if param.HasField('torch_pooling') and param.torch_pooling:
# In the Facebook port of Caffe, a torch_pooling field was added to
# map the pooling computation of Torch. Essentially, it uses
# floor((height + 2 * padding - kernel) / stride) + 1
# instead of
# ceil((height + 2 * padding - kernel) / stride) + 1
# which is Caffe's version.
# Torch pooling is actually the same as Caffe2 pooling, so we don't
# need to do anything.
pass
else:
AddArgument(caffe_op, "legacy_pad",
caffe2_legacy_pb2.CAFFE_LEGACY_POOLING)
if param.global_pooling:
AddArgument(caffe_op, "global_pooling", 1)
return caffe_op, []
@TranslatorRegistry.Register("LRN")
def TranslateLRN(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "LRN")
caffe_op.output.extend(['_' + caffe_op.output[0] + '_scale'])
param = layer.lrn_param
if param.norm_region != caffe_pb2.LRNParameter.ACROSS_CHANNELS:
raise ValueError(
"Does not support norm region other than across channels.")
AddArgument(caffe_op, "size", int(param.local_size))
AddArgument(caffe_op, "alpha", float(param.alpha))
AddArgument(caffe_op, "beta", float(param.beta))
AddArgument(caffe_op, "bias", float(param.k))
AddArgument(caffe_op, "order", "NCHW")
return caffe_op, []
@TranslatorRegistry.Register("InnerProduct")
def TranslateInnerProduct(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "FC")
output = caffe_op.output[0]
caffe_op.input.extend([output + '_w', output + '_b'])
weight = utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[0][0, 0], output + '_w'
)
bias = utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[1].flatten(), output + '_b'
)
return caffe_op, [weight, bias]
@TranslatorRegistry.Register("Dropout")
def TranslateDropout(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "Dropout")
caffe_op.output.extend(['_' + caffe_op.output[0] + '_mask'])
param = layer.dropout_param
AddArgument(caffe_op, "ratio", param.dropout_ratio)
if (is_test):
AddArgument(caffe_op, "is_test", 1)
return caffe_op, []
@TranslatorRegistry.Register("Softmax")
def TranslateSoftmax(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "Softmax")
return caffe_op, []
@TranslatorRegistry.Register("SoftmaxWithLoss")
def TranslateSoftmaxWithLoss(layer, pretrained_blobs, is_test):
softmax_op = core.CreateOperator(
"Softmax", [layer.bottom[0]],
layer.bottom[0] + "_translator_autogen_softmax")
xent_op = core.CreateOperator(
"LabelCrossEntropy",
[softmax_op.output[0], layer.bottom[1]],
layer.bottom[0] + "_translator_autogen_xent")
loss_op = core.CreateOperator(
"AveragedLoss",
xent_op.output[0],
layer.top[0])
return [softmax_op, xent_op, loss_op], []
@TranslatorRegistry.Register("Accuracy")
def TranslateAccuracy(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "Accuracy")
if layer.accuracy_param.top_k != 1:
log.warn("Translation does not support Accuracy layers top_k >1.")
return caffe_op, []
@TranslatorRegistry.Register("Concat")
def TranslateConcat(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "Concat")
caffe_op.output.extend(['_' + caffe_op.output[0] + '_dims'])
AddArgument(caffe_op, "order", "NCHW")
return caffe_op, []
@TranslatorRegistry.Register("TanH")
def TranslateTanH(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "Tanh")
return caffe_op, []
@TranslatorRegistry.Register("InstanceNorm")
def TranslateInstanceNorm(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "InstanceNorm")
output = caffe_op.output[0]
weight = utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[0].flatten(), output + '_w')
bias = utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[1].flatten(), output + '_b')
caffe_op.input.extend([output + '_w', output + '_b'])
AddArgument(caffe_op, "order", "NCHW")
return caffe_op, [weight, bias]
@TranslatorRegistry.Register("Eltwise")
def TranslateElementWise(layer, pretrained_blobs, is_test):
param = layer.eltwise_param
# TODO(jiayq): if we have a protobuf that uses this, lift this constraint
# and verify that we can correctly translate.
if len(param.coeff) or param.operation != 1:
raise RuntimeError("This eltwise layer is not yet supported.")
caffe_op = BaseTranslate(layer, "Sum")
return caffe_op, []
@TranslatorRegistry.Register("Scale")
def TranslateScale(layer, pretrained_blobs, is_test):
mul_op = BaseTranslate(layer, "Mul")
scale_param = layer.scale_param
AddArgument(mul_op, "axis", scale_param.axis)
AddArgument(mul_op, "broadcast", True)
if len(mul_op.input) == 1:
# the scale parameter is in pretrained blobs
if scale_param.num_axes != 1:
raise RuntimeError("This path has not been verified yet.")
output = mul_op.output[0]
mul_op_param = output + '_w'
mul_op.input.append(mul_op_param)
weights = []
weights.append(utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[0].flatten(), mul_op_param))
add_op = None
if len(pretrained_blobs) == 1:
# No bias-term in Scale layer
pass
elif len(pretrained_blobs) == 2:
# Caffe Scale layer supports a bias term such that it computes
# (scale_param * X + bias), whereas Caffe2 Mul op doesn't.
# Include a separate Add op for the bias followed by Mul.
add_op = copy.deepcopy(mul_op)
add_op.type = "Add"
add_op_param = output + '_b'
internal_blob = output + "_internal"
del mul_op.output[:]
mul_op.output.append(internal_blob)
del add_op.input[:]
add_op.input.append(internal_blob)
add_op.input.append(add_op_param)
weights.append(utils.NumpyArrayToCaffe2Tensor(
pretrained_blobs[1].flatten(), add_op_param))
else:
raise RuntimeError("Unexpected number of pretrained blobs in Scale")
caffe_ops = [mul_op]
if add_op:
caffe_ops.append(add_op)
assert len(caffe_ops) == len(weights)
return caffe_ops, weights
elif len(mul_op.input) == 2:
# TODO(jiayq): find a protobuf that uses this and verify.
raise RuntimeError("This path has not been verified yet.")
else:
raise RuntimeError("Unexpected number of inputs.")
@TranslatorRegistry.Register("Reshape")
def TranslateReshape(layer, pretrained_blobs, is_test):
caffe_op = BaseTranslate(layer, "Reshape")
caffe_op.output.append("_" + caffe_op.input[0] + "_dims")
reshape_param = layer.reshape_param
AddArgument(caffe_op, 'shape', reshape_param.shape.dim)
return caffe_op, []