mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-07 00:20:20 +01:00
Fix: add GDN to __init__. Also put it in alphabetical order.
PiperOrigin-RevId: 163842410
This commit is contained in:
parent
db0e1c6c8e
commit
1a44996072
|
|
@ -32,6 +32,8 @@ See the @{$python/contrib.layers} guide.
|
|||
@@embedding_lookup_unique
|
||||
@@flatten
|
||||
@@fully_connected
|
||||
@@GDN
|
||||
@@gdn
|
||||
@@layer_norm
|
||||
@@linear
|
||||
@@max_pool2d
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ __all__ = ['avg_pool2d',
|
|||
'elu',
|
||||
'flatten',
|
||||
'fully_connected',
|
||||
'GDN',
|
||||
'gdn',
|
||||
'layer_norm',
|
||||
'linear',
|
||||
'pool',
|
||||
|
|
@ -1682,6 +1684,316 @@ def fully_connected(inputs,
|
|||
outputs_collections, sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
class GDN(base.Layer):
|
||||
"""Generalized divisive normalization layer.
|
||||
|
||||
Based on the papers:
|
||||
|
||||
"Density Modeling of Images using a Generalized Normalization
|
||||
Transformation"
|
||||
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
|
||||
https://arxiv.org/abs/1511.06281
|
||||
|
||||
"End-to-end Optimized Image Compression"
|
||||
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
|
||||
https://arxiv.org/abs/1611.01704
|
||||
|
||||
Implements an activation function that is essentially a multivariate
|
||||
generalization of a particular sigmoid-type function:
|
||||
|
||||
```
|
||||
y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j]))
|
||||
```
|
||||
|
||||
where `i` and `j` run over channels. This implementation never sums across
|
||||
spatial dimensions. It is similar to local response normalization, but much
|
||||
more flexible, as `beta` and `gamma` are trainable parameters.
|
||||
|
||||
Arguments:
|
||||
inverse: If `False` (default), compute GDN response. If `True`, compute IGDN
|
||||
response (one step of fixed point iteration to invert GDN; the division
|
||||
is replaced by multiplication).
|
||||
beta_min: Lower bound for beta, to prevent numerical error from causing
|
||||
square root of zero or negative values.
|
||||
gamma_init: The gamma matrix will be initialized as the identity matrix
|
||||
multiplied with this value. If set to zero, the layer is effectively
|
||||
initialized to the identity operation, since beta is initialized as one.
|
||||
A good default setting is somewhere between 0 and 0.5.
|
||||
reparam_offset: Offset added to the reparameterization of beta and gamma.
|
||||
The reparameterization of beta and gamma as their square roots lets the
|
||||
training slow down when their values are close to zero, which is desirable
|
||||
as small values in the denominator can lead to a situation where gradient
|
||||
noise on beta/gamma leads to extreme amounts of noise in the GDN
|
||||
activations. However, without the offset, we would get zero gradients if
|
||||
any elements of beta or gamma were exactly zero, and thus the training
|
||||
could get stuck. To prevent this, we add this small constant. The default
|
||||
value was empirically determined as a good starting point. Making it
|
||||
bigger potentially leads to more gradient noise on the activations, making
|
||||
it too small may lead to numerical precision issues.
|
||||
data_format: Format of input tensor. Currently supports `'channels_first'`
|
||||
and `'channels_last'`.
|
||||
activity_regularizer: Regularizer function for the output.
|
||||
trainable: Boolean, if `True`, also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
|
||||
name: String, the name of the layer. Layers with the same name will
|
||||
share weights, but to avoid mistakes we require `reuse=True` in such
|
||||
cases.
|
||||
|
||||
Properties:
|
||||
inverse: Boolean, whether GDN is computed (`True`) or IGDN (`False`).
|
||||
data_format: Format of input tensor. Currently supports `'channels_first'`
|
||||
and `'channels_last'`.
|
||||
beta: The beta parameter as defined above (1D `Tensor`).
|
||||
gamma: The gamma parameter as defined above (2D `Tensor`).
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
inverse=False,
|
||||
beta_min=1e-6,
|
||||
gamma_init=.1,
|
||||
reparam_offset=2 ** -18,
|
||||
data_format='channels_last',
|
||||
activity_regularizer=None,
|
||||
trainable=True,
|
||||
name=None,
|
||||
**kwargs):
|
||||
super(GDN, self).__init__(trainable=trainable, name=name, **kwargs)
|
||||
self.inverse = inverse
|
||||
self._beta_min = beta_min
|
||||
self._gamma_init = gamma_init
|
||||
self._reparam_offset = reparam_offset
|
||||
self.data_format = data_format
|
||||
self.activity_regularizer = activity_regularizer
|
||||
self._channel_axis() # trigger ValueError early
|
||||
self.input_spec = base.InputSpec(min_ndim=3, max_ndim=5)
|
||||
|
||||
def _channel_axis(self):
|
||||
try:
|
||||
return {'channels_first': 1, 'channels_last': -1}[self.data_format]
|
||||
except KeyError:
|
||||
raise ValueError('Unsupported `data_format` for GDN layer: {}.'.format(
|
||||
self.data_format))
|
||||
|
||||
@staticmethod
|
||||
def _lower_bound(inputs, bound, name=None):
|
||||
"""Same as tf.maximum, but with helpful gradient for inputs < bound.
|
||||
|
||||
The gradient is overwritten so that it is passed through if the input is not
|
||||
hitting the bound. If it is, only gradients that push `inputs` higher than
|
||||
the bound are passed through. No gradients are passed through to the bound.
|
||||
|
||||
Args:
|
||||
inputs: input tensor
|
||||
bound: lower bound for the input tensor
|
||||
name: name for this op
|
||||
|
||||
Returns:
|
||||
tf.maximum(inputs, bound)
|
||||
"""
|
||||
with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope:
|
||||
inputs = ops.convert_to_tensor(inputs, name='inputs')
|
||||
bound = ops.convert_to_tensor(bound, name='bound')
|
||||
with ops.get_default_graph().gradient_override_map(
|
||||
{'Maximum': 'GDNLowerBound'}):
|
||||
return math_ops.maximum(inputs, bound, name=scope)
|
||||
|
||||
@staticmethod
|
||||
def _lower_bound_grad(op, grad):
|
||||
"""Gradient for `_lower_bound`.
|
||||
|
||||
Args:
|
||||
op: the tensorflow op for which to calculate a gradient
|
||||
grad: gradient with respect to the output of the op
|
||||
|
||||
Returns:
|
||||
gradients with respect to the inputs of the op
|
||||
"""
|
||||
inputs = op.inputs[0]
|
||||
bound = op.inputs[1]
|
||||
pass_through_if = math_ops.logical_or(inputs >= bound, grad < 0)
|
||||
return [math_ops.cast(pass_through_if, grad.dtype) * grad, None]
|
||||
|
||||
def build(self, input_shape):
|
||||
channel_axis = self._channel_axis()
|
||||
input_shape = tensor_shape.TensorShape(input_shape)
|
||||
num_channels = input_shape[channel_axis].value
|
||||
if num_channels is None:
|
||||
raise ValueError('The channel dimension of the inputs to `GDN` '
|
||||
'must be defined.')
|
||||
self._input_rank = input_shape.ndims
|
||||
self.input_spec = base.InputSpec(ndim=input_shape.ndims,
|
||||
axes={channel_axis: num_channels})
|
||||
|
||||
pedestal = array_ops.constant(self._reparam_offset ** 2, dtype=self.dtype)
|
||||
beta_bound = array_ops.constant(
|
||||
(self._beta_min + self._reparam_offset ** 2) ** .5, dtype=self.dtype)
|
||||
gamma_bound = array_ops.constant(self._reparam_offset, dtype=self.dtype)
|
||||
|
||||
def beta_initializer(shape, dtype=None, partition_info=None):
|
||||
del partition_info # unused
|
||||
return math_ops.sqrt(array_ops.ones(shape, dtype=dtype) + pedestal)
|
||||
|
||||
def gamma_initializer(shape, dtype=None, partition_info=None):
|
||||
del partition_info # unused
|
||||
assert len(shape) == 2
|
||||
assert shape[0] == shape[1]
|
||||
eye = linalg_ops.eye(shape[0], dtype=dtype)
|
||||
return math_ops.sqrt(self._gamma_init * eye + pedestal)
|
||||
|
||||
beta = self.add_variable('reparam_beta',
|
||||
shape=[num_channels],
|
||||
initializer=beta_initializer,
|
||||
dtype=self.dtype,
|
||||
trainable=True)
|
||||
beta = self._lower_bound(beta, beta_bound)
|
||||
self.beta = math_ops.square(beta) - pedestal
|
||||
|
||||
gamma = self.add_variable('reparam_gamma',
|
||||
shape=[num_channels, num_channels],
|
||||
initializer=gamma_initializer,
|
||||
dtype=self.dtype,
|
||||
trainable=True)
|
||||
gamma = self._lower_bound(gamma, gamma_bound)
|
||||
self.gamma = math_ops.square(gamma) - pedestal
|
||||
|
||||
self.built = True
|
||||
|
||||
def call(self, inputs):
|
||||
inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
|
||||
ndim = self._input_rank
|
||||
|
||||
shape = self.gamma.get_shape().as_list()
|
||||
gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape)
|
||||
|
||||
# Compute normalization pool.
|
||||
if self.data_format == 'channels_first':
|
||||
norm_pool = nn.convolution(math_ops.square(inputs), gamma, 'VALID',
|
||||
data_format='NC' + 'DHW'[-(ndim - 2):])
|
||||
if ndim == 3:
|
||||
norm_pool = array_ops.expand_dims(norm_pool, 2)
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW')
|
||||
norm_pool = array_ops.squeeze(norm_pool, [2])
|
||||
elif ndim == 5:
|
||||
shape = array_ops.shape(norm_pool)
|
||||
norm_pool = array_ops.reshape(norm_pool, shape[:3] + [-1])
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW')
|
||||
norm_pool = array_ops.reshape(norm_pool, shape)
|
||||
else: # ndim == 4
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW')
|
||||
else: # channels_last
|
||||
norm_pool = nn.convolution(math_ops.square(inputs), gamma, 'VALID')
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NHWC')
|
||||
norm_pool = math_ops.sqrt(norm_pool)
|
||||
|
||||
if self.inverse:
|
||||
outputs = inputs * norm_pool
|
||||
else:
|
||||
outputs = inputs / norm_pool
|
||||
outputs.set_shape(inputs.get_shape())
|
||||
return outputs
|
||||
|
||||
def _compute_output_shape(self, input_shape):
|
||||
channel_axis = self._channel_axis()
|
||||
input_shape = tensor_shape.TensorShape(input_shape)
|
||||
if not 3 <= input_shape.ndim <= 5:
|
||||
raise ValueError('`input_shape` must be of rank 3 to 5, inclusive.')
|
||||
if input_shape[channel_axis].value is None:
|
||||
raise ValueError(
|
||||
'The channel dimension of `input_shape` must be defined.')
|
||||
return input_shape
|
||||
|
||||
|
||||
ops.RegisterGradient('GDNLowerBound')(GDN._lower_bound_grad) # pylint:disable=protected-access
|
||||
|
||||
|
||||
def gdn(inputs,
|
||||
inverse=False,
|
||||
beta_min=1e-6,
|
||||
gamma_init=.1,
|
||||
reparam_offset=2 ** -18,
|
||||
data_format='channels_last',
|
||||
activity_regularizer=None,
|
||||
trainable=True,
|
||||
name=None,
|
||||
reuse=None):
|
||||
"""Functional interface for GDN layer.
|
||||
|
||||
Based on the papers:
|
||||
|
||||
"Density Modeling of Images using a Generalized Normalization
|
||||
Transformation"
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
https://arxiv.org/abs/1511.06281
|
||||
|
||||
"End-to-end Optimized Image Compression"
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
https://arxiv.org/abs/1611.01704
|
||||
|
||||
Implements an activation function that is essentially a multivariate
|
||||
generalization of a particular sigmoid-type function:
|
||||
|
||||
```
|
||||
y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j]))
|
||||
```
|
||||
|
||||
where `i` and `j` run over channels. This implementation never sums across
|
||||
spatial dimensions. It is similar to local response normalization, but much
|
||||
more flexible, as `beta` and `gamma` are trainable parameters.
|
||||
|
||||
Arguments:
|
||||
inputs: Tensor input.
|
||||
inverse: If `False` (default), compute GDN response. If `True`, compute IGDN
|
||||
response (one step of fixed point iteration to invert GDN; the division
|
||||
is replaced by multiplication).
|
||||
beta_min: Lower bound for beta, to prevent numerical error from causing
|
||||
square root of zero or negative values.
|
||||
gamma_init: The gamma matrix will be initialized as the identity matrix
|
||||
multiplied with this value. If set to zero, the layer is effectively
|
||||
initialized to the identity operation, since beta is initialized as one.
|
||||
A good default setting is somewhere between 0 and 0.5.
|
||||
reparam_offset: Offset added to the reparameterization of beta and gamma.
|
||||
The reparameterization of beta and gamma as their square roots lets the
|
||||
training slow down when their values are close to zero, which is desirable
|
||||
as small values in the denominator can lead to a situation where gradient
|
||||
noise on beta/gamma leads to extreme amounts of noise in the GDN
|
||||
activations. However, without the offset, we would get zero gradients if
|
||||
any elements of beta or gamma were exactly zero, and thus the training
|
||||
could get stuck. To prevent this, we add this small constant. The default
|
||||
value was empirically determined as a good starting point. Making it
|
||||
bigger potentially leads to more gradient noise on the activations, making
|
||||
it too small may lead to numerical precision issues.
|
||||
data_format: Format of input tensor. Currently supports `'channels_first'`
|
||||
and `'channels_last'`.
|
||||
activity_regularizer: Regularizer function for the output.
|
||||
trainable: Boolean, if `True`, also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
|
||||
name: String, the name of the layer. Layers with the same name will
|
||||
share weights, but to avoid mistakes we require `reuse=True` in such
|
||||
cases.
|
||||
reuse: Boolean, whether to reuse the weights of a previous layer by the same
|
||||
name.
|
||||
|
||||
Returns:
|
||||
Output tensor.
|
||||
"""
|
||||
layer = GDN(inverse=inverse,
|
||||
beta_min=beta_min,
|
||||
gamma_init=gamma_init,
|
||||
reparam_offset=reparam_offset,
|
||||
data_format=data_format,
|
||||
activity_regularizer=activity_regularizer,
|
||||
trainable=trainable,
|
||||
name=name,
|
||||
dtype=inputs.dtype.base_dtype,
|
||||
_scope=name,
|
||||
_reuse=reuse)
|
||||
return layer.apply(inputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def layer_norm(inputs,
|
||||
center=True,
|
||||
|
|
@ -1812,300 +2124,6 @@ def layer_norm(inputs,
|
|||
outputs)
|
||||
|
||||
|
||||
class GDN(base.Layer):
|
||||
"""Generalized divisive normalization layer.
|
||||
|
||||
Based on the papers:
|
||||
|
||||
"Density Modeling of Images using a Generalized Normalization
|
||||
Transformation"
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
https://arxiv.org/abs/1511.06281
|
||||
|
||||
"End-to-end Optimized Image Compression"
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
https://arxiv.org/abs/1611.01704
|
||||
|
||||
Implements an activation function that is essentially a multivariate
|
||||
generalization of a particular sigmoid-type function:
|
||||
|
||||
y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j]))
|
||||
|
||||
where i and j run over channels. This implementation never sums across spatial
|
||||
dimensions. It is similar to local response normalization, but more powerful,
|
||||
as beta and gamma are trainable parameters.
|
||||
|
||||
Arguments:
|
||||
inverse: If False (default), compute GDN response. If True, compute IGDN
|
||||
response (one step of fixed point iteration to invert GDN; the division
|
||||
is replaced by multiplication).
|
||||
beta_min: Lower bound for beta, to prevent numerical error from causing
|
||||
square root of zero or negative values.
|
||||
gamma_init: The gamma matrix will be initialized as the identity matrix
|
||||
multiplied with this value. If set to zero, the layer is effectively
|
||||
initialized to the identity operation, since beta is initialized as one.
|
||||
A good default setting is somewhere between 0 and 0.5.
|
||||
reparam_offset: Offset added to the reparameterization of beta and gamma.
|
||||
The reparameterization of beta and gamma as their square roots lets the
|
||||
training slow down when their values are close to zero, which is desirable
|
||||
as small values in the denominator can lead to a situation where gradient
|
||||
noise on beta/gamma leads to extreme amounts of noise in the GDN
|
||||
activations. However, without the offset, we would get zero gradients if
|
||||
any elements of beta or gamma were exactly zero, and thus the training
|
||||
could get stuck. To prevent this, we add this small constant. The default
|
||||
value was empirically determined as a good starting point. Making it
|
||||
bigger potentially leads to more gradient noise on the activations, making
|
||||
it too small may lead to numerical precision issues.
|
||||
data_format: Format of input tensor. Currently supports 'channels_first' and
|
||||
'channels_last'.
|
||||
trainable: Boolean, if `True` also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
|
||||
name: String, the name of the layer. Layers with the same name will
|
||||
share weights, but to avoid mistakes we require reuse=True in such cases.
|
||||
reuse: Boolean, whether to reuse the weights of a previous layer
|
||||
by the same name.
|
||||
|
||||
Properties:
|
||||
inverse: Boolean, whether GDN is computed (True) or IGDN (False).
|
||||
data_format: Format of input tensor. Currently supports 'channels_first' and
|
||||
'channels_last'.
|
||||
beta: The beta parameter as defined above (1D TensorFlow tensor).
|
||||
gamma: The gamma parameter as defined above (2D TensorFlow tensor).
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
inverse=False,
|
||||
beta_min=1e-6,
|
||||
gamma_init=.1,
|
||||
reparam_offset=2 ** -18,
|
||||
data_format='channels_last',
|
||||
trainable=True,
|
||||
name=None,
|
||||
**kwargs):
|
||||
super(GDN, self).__init__(trainable=trainable, name=name, **kwargs)
|
||||
self.inverse = inverse
|
||||
self._beta_min = beta_min
|
||||
self._gamma_init = gamma_init
|
||||
self._reparam_offset = reparam_offset
|
||||
self.data_format = data_format
|
||||
self._channel_axis() # trigger ValueError early
|
||||
self.input_spec = base.InputSpec(min_ndim=3, max_ndim=5)
|
||||
|
||||
def _channel_axis(self):
|
||||
try:
|
||||
return {'channels_first': 1, 'channels_last': -1}[self.data_format]
|
||||
except KeyError:
|
||||
raise ValueError('Unsupported `data_format` for GDN layer: {}.'.format(
|
||||
self.data_format))
|
||||
|
||||
@staticmethod
|
||||
def _lower_bound(inputs, bound, name=None):
|
||||
"""Same as tf.maximum, but with helpful gradient for inputs < bound.
|
||||
|
||||
The gradient is overwritten so that it is passed through if the input is not
|
||||
hitting the bound. If it is, only gradients that push `inputs` higher than
|
||||
the bound are passed through. No gradients are passed through to the bound.
|
||||
|
||||
Args:
|
||||
inputs: input tensor
|
||||
bound: lower bound for the input tensor
|
||||
name: name for this op
|
||||
|
||||
Returns:
|
||||
tf.maximum(inputs, bound)
|
||||
"""
|
||||
with ops.name_scope(name, 'GDNLowerBound', [inputs, bound]) as scope:
|
||||
inputs = ops.convert_to_tensor(inputs, name='inputs')
|
||||
bound = ops.convert_to_tensor(bound, name='bound')
|
||||
with ops.get_default_graph().gradient_override_map(
|
||||
{'Maximum': 'GDNLowerBound'}):
|
||||
return math_ops.maximum(inputs, bound, name=scope)
|
||||
|
||||
@ops.RegisterGradient('GDNLowerBound')
|
||||
@staticmethod
|
||||
def _lower_bound_grad(op, grad):
|
||||
"""Gradient for `_lower_bound`.
|
||||
|
||||
Args:
|
||||
op: the tensorflow op for which to calculate a gradient
|
||||
grad: gradient with respect to the output of the op
|
||||
|
||||
Returns:
|
||||
gradients with respect to the inputs of the op
|
||||
"""
|
||||
inputs = op.inputs[0]
|
||||
bound = op.inputs[1]
|
||||
pass_through_if = math_ops.logical_or(inputs >= bound, grad < 0)
|
||||
return [math_ops.cast(pass_through_if, grad.dtype) * grad, None]
|
||||
|
||||
def build(self, input_shape):
|
||||
channel_axis = self._channel_axis()
|
||||
input_shape = tensor_shape.TensorShape(input_shape)
|
||||
num_channels = input_shape[channel_axis].value
|
||||
if num_channels is None:
|
||||
raise ValueError('The channel dimension of the inputs to `GDN` '
|
||||
'must be defined.')
|
||||
self._input_rank = input_shape.ndims
|
||||
self.input_spec = base.InputSpec(ndim=input_shape.ndims,
|
||||
axes={channel_axis: num_channels})
|
||||
|
||||
pedestal = array_ops.constant(self._reparam_offset ** 2, dtype=self.dtype)
|
||||
beta_bound = array_ops.constant(
|
||||
(self._beta_min + self._reparam_offset ** 2) ** .5, dtype=self.dtype)
|
||||
gamma_bound = array_ops.constant(self._reparam_offset, dtype=self.dtype)
|
||||
|
||||
def beta_initializer(shape, dtype=None, partition_info=None):
|
||||
del partition_info # unused
|
||||
return math_ops.sqrt(array_ops.ones(shape, dtype=dtype) + pedestal)
|
||||
|
||||
def gamma_initializer(shape, dtype=None, partition_info=None):
|
||||
del partition_info # unused
|
||||
assert len(shape) == 2
|
||||
assert shape[0] == shape[1]
|
||||
eye = linalg_ops.eye(shape[0], dtype=dtype)
|
||||
return math_ops.sqrt(self._gamma_init * eye + pedestal)
|
||||
|
||||
beta = self.add_variable('reparam_beta',
|
||||
shape=[num_channels],
|
||||
initializer=beta_initializer,
|
||||
dtype=self.dtype,
|
||||
trainable=True)
|
||||
beta = self._lower_bound(beta, beta_bound)
|
||||
self.beta = math_ops.square(beta) - pedestal
|
||||
|
||||
gamma = self.add_variable('reparam_gamma',
|
||||
shape=[num_channels, num_channels],
|
||||
initializer=gamma_initializer,
|
||||
dtype=self.dtype,
|
||||
trainable=True)
|
||||
gamma = self._lower_bound(gamma, gamma_bound)
|
||||
self.gamma = math_ops.square(gamma) - pedestal
|
||||
|
||||
self.built = True
|
||||
|
||||
def call(self, inputs):
|
||||
inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
|
||||
ndim = self._input_rank
|
||||
|
||||
shape = self.gamma.get_shape().as_list()
|
||||
gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape)
|
||||
|
||||
# Compute normalization pool.
|
||||
if self.data_format == 'channels_first':
|
||||
norm_pool = nn.convolution(math_ops.square(inputs), gamma, 'VALID',
|
||||
data_format='NC' + 'DHW'[-(ndim - 2):])
|
||||
if ndim == 3:
|
||||
norm_pool = array_ops.expand_dims(norm_pool, 2)
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW')
|
||||
norm_pool = array_ops.squeeze(norm_pool, [2])
|
||||
elif ndim == 5:
|
||||
shape = array_ops.shape(norm_pool)
|
||||
norm_pool = array_ops.reshape(norm_pool, shape[:3] + [-1])
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW')
|
||||
norm_pool = array_ops.reshape(norm_pool, shape)
|
||||
else: # ndim == 4
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NCHW')
|
||||
else: # channels_last
|
||||
norm_pool = nn.convolution(math_ops.square(inputs), gamma, 'VALID')
|
||||
norm_pool = nn.bias_add(norm_pool, self.beta, data_format='NHWC')
|
||||
norm_pool = math_ops.sqrt(norm_pool)
|
||||
|
||||
if self.inverse:
|
||||
outputs = inputs * norm_pool
|
||||
else:
|
||||
outputs = inputs / norm_pool
|
||||
outputs.set_shape(inputs.get_shape())
|
||||
return outputs
|
||||
|
||||
def _compute_output_shape(self, input_shape):
|
||||
channel_axis = self._channel_axis()
|
||||
input_shape = tensor_shape.TensorShape(input_shape)
|
||||
if not 3 <= input_shape.ndim <= 5:
|
||||
raise ValueError('`input_shape` must be of rank 3 to 5, inclusive.')
|
||||
if input_shape[channel_axis].value is None:
|
||||
raise ValueError(
|
||||
'The channel dimension of `input_shape` must be defined.')
|
||||
return input_shape
|
||||
|
||||
|
||||
def gdn(inputs,
|
||||
inverse=False,
|
||||
beta_min=1e-6,
|
||||
gamma_init=.1,
|
||||
reparam_offset=2 ** -18,
|
||||
data_format='channels_last',
|
||||
trainable=True,
|
||||
name=None,
|
||||
reuse=None):
|
||||
"""Functional interface for GDN layer.
|
||||
|
||||
Based on the papers:
|
||||
|
||||
"Density Modeling of Images using a Generalized Normalization
|
||||
Transformation"
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
https://arxiv.org/abs/1511.06281
|
||||
|
||||
"End-to-end Optimized Image Compression"
|
||||
Johannes Ballé, Valero Laparra, Eero P. Simoncelli
|
||||
https://arxiv.org/abs/1611.01704
|
||||
|
||||
Implements an activation function that is essentially a multivariate
|
||||
generalization of a particular sigmoid-type function:
|
||||
|
||||
y[i] = x[i] / sqrt(beta[i] + sum_j(gamma[j, i] * x[j]))
|
||||
|
||||
where i and j run over channels. This implementation never sums across spatial
|
||||
dimensions. It is similar to local response normalization, but more powerful,
|
||||
as beta and gamma are trainable parameters.
|
||||
|
||||
Arguments:
|
||||
inputs: Tensor input.
|
||||
inverse: If False (default), compute GDN response. If True, compute IGDN
|
||||
response (one step of fixed point iteration to invert GDN; the division
|
||||
is replaced by multiplication).
|
||||
beta_min: Lower bound for beta, to prevent numerical error from causing
|
||||
square root of zero or negative values.
|
||||
gamma_init: The gamma matrix will be initialized as the identity matrix
|
||||
multiplied with this value. If set to zero, the layer is effectively
|
||||
initialized to the identity operation, since beta is initialized as one.
|
||||
A good default setting is somewhere between 0 and 0.5.
|
||||
reparam_offset: Offset added to the reparameterization of beta and gamma.
|
||||
The reparameterization of beta and gamma as their square roots lets the
|
||||
training slow down when their values are close to zero, which is desirable
|
||||
as small values in the denominator can lead to a situation where gradient
|
||||
noise on beta/gamma leads to extreme amounts of noise in the GDN
|
||||
activations. However, without the offset, we would get zero gradients if
|
||||
any elements of beta or gamma were exactly zero, and thus the training
|
||||
could get stuck. To prevent this, we add this small constant. The default
|
||||
value was empirically determined as a good starting point. Making it
|
||||
bigger potentially leads to more gradient noise on the activations, making
|
||||
it too small may lead to numerical precision issues.
|
||||
data_format: Format of input tensor. Currently supports 'channels_first' and
|
||||
'channels_last'.
|
||||
trainable: Boolean, if `True` also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
|
||||
name: String, the name of the layer. Layers with the same name will
|
||||
share weights, but to avoid mistakes we require reuse=True in such cases.
|
||||
reuse: Boolean, whether to reuse the weights of a previous layer
|
||||
by the same name.
|
||||
|
||||
Returns:
|
||||
Output tensor.
|
||||
"""
|
||||
layer = GDN(inverse=inverse,
|
||||
beta_min=beta_min,
|
||||
gamma_init=gamma_init,
|
||||
reparam_offset=reparam_offset,
|
||||
data_format=data_format,
|
||||
trainable=trainable,
|
||||
name=name,
|
||||
dtype=inputs.dtype.base_dtype,
|
||||
_scope=name,
|
||||
_reuse=reuse)
|
||||
return layer.apply(inputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def max_pool2d(inputs,
|
||||
kernel_size,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user