Permanently remove several modules from tf.contrib.bayesflow.

These modules are very infrequently used and will not be developed moving forward.
Removing this code paves the way for remaining modules in tf.contrib.bayesflow
to move to their own repo.

PiperOrigin-RevId: 174110067
This commit is contained in:
Eugene Brevdo 2017-10-31 16:20:19 -07:00 committed by TensorFlower Gardener
parent ef7052fbd9
commit 2ccf3aba42
24 changed files with 11 additions and 3694 deletions

View File

@ -3,12 +3,15 @@
# particularly useful for Bayesian inference.
# APIs here are meant to evolve over time.
package(default_visibility = [
"//learning/brain/contrib/bayesflow:__subpackages__",
"//tensorflow:__subpackages__",
])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
package(default_visibility = ["//tensorflow:__subpackages__"])
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
py_library(
@ -100,44 +103,6 @@ cuda_py_test(
],
)
cuda_py_test(
name = "entropy_test",
size = "medium",
srcs = ["python/kernel_tests/entropy_test.py"],
additional_deps = [
":bayesflow_py",
"//third_party/py/numpy",
"//tensorflow/contrib/distributions:distributions_py",
"//tensorflow/contrib/layers:layers_py",
"//tensorflow/python/ops/distributions",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:math_ops",
"//tensorflow/python:nn_ops",
"//tensorflow/python:variables",
],
)
cuda_py_test(
name = "stochastic_variables_test",
size = "medium",
srcs = ["python/kernel_tests/stochastic_variables_test.py"],
additional_deps = [
":bayesflow_py",
"//third_party/py/numpy",
"//tensorflow/contrib/distributions:distributions_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
"//tensorflow/python:random_ops",
"//tensorflow/python:variable_scope",
"//tensorflow/python:variables",
],
)
cuda_py_test(
name = "monte_carlo_test",
size = "small",
@ -180,88 +145,6 @@ cuda_py_test(
],
)
cuda_py_test(
name = "stochastic_graph_test",
size = "small",
srcs = ["python/kernel_tests/stochastic_graph_test.py"],
additional_deps = [
":bayesflow_py",
"//tensorflow/contrib/distributions:distributions_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:control_flow_ops",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:gradients",
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
],
)
cuda_py_test(
name = "variational_inference_test",
size = "small",
srcs = ["python/kernel_tests/variational_inference_test.py"],
additional_deps = [
":bayesflow_py",
"//tensorflow/contrib/distributions:distributions_py",
"//tensorflow/contrib/layers:layers_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
"//tensorflow/python:variables",
],
)
cuda_py_test(
name = "stochastic_tensor_test",
size = "small",
srcs = ["python/kernel_tests/stochastic_tensor_test.py"],
additional_deps = [
":bayesflow_py",
"//third_party/py/numpy",
"//tensorflow/contrib/distributions:distributions_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:platform_test",
],
)
cuda_py_test(
name = "stochastic_gradient_estimators_test",
size = "medium",
srcs = ["python/kernel_tests/stochastic_gradient_estimators_test.py"],
additional_deps = [
":bayesflow_py",
"//third_party/py/numpy",
"//tensorflow/contrib/distributions:distributions_py",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:gradients",
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
"//tensorflow/python:variables",
],
)
cuda_py_test(
name = "reinforce_simple_example",
size = "small",
srcs = ["examples/reinforce_simple/reinforce_simple_example.py"],
additional_deps = [
":bayesflow_py",
"//tensorflow:tensorflow_py",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:platform_test",
],
)
filegroup(
name = "all_files",
srcs = glob(

View File

@ -23,15 +23,9 @@ from __future__ import print_function
# pylint: disable=unused-import,line-too-long
from tensorflow.contrib.bayesflow.python.ops import csiszar_divergence
from tensorflow.contrib.bayesflow.python.ops import custom_grad
from tensorflow.contrib.bayesflow.python.ops import entropy
from tensorflow.contrib.bayesflow.python.ops import hmc
from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings
from tensorflow.contrib.bayesflow.python.ops import monte_carlo
from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators
from tensorflow.contrib.bayesflow.python.ops import stochastic_graph
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
from tensorflow.contrib.bayesflow.python.ops import stochastic_variables
from tensorflow.contrib.bayesflow.python.ops import variational_inference
# pylint: enable=unused-import,line-too-long
from tensorflow.python.util.all_util import remove_undocumented
@ -39,8 +33,6 @@ from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = ['csiszar_divergence', 'custom_grad', 'entropy',
'metropolis_hastings', 'monte_carlo', 'hmc', 'special_math',
'stochastic_gradient_estimators', 'stochastic_graph',
'stochastic_tensor', 'stochastic_variables',
'variational_inference']
'stochastic_variables', 'variational_inference']
remove_undocumented(__name__, _allowed_symbols)

View File

@ -1,140 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Simple examples of the REINFORCE algorithm."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
distributions = tf.contrib.distributions
sg = tf.contrib.bayesflow.stochastic_graph
st = tf.contrib.bayesflow.stochastic_tensor
def split_apply_merge(inp, partitions, fns):
"""Split input according to partitions. Pass results through fns and merge.
Args:
inp: the input vector
partitions: tensor of same length as input vector, having values 0, 1
fns: the two functions.
Returns:
the vector routed, where routed[i] = fns[partitions[i]](inp[i])
"""
new_inputs = tf.dynamic_partition(inp, partitions, len(fns))
new_outputs = [fns[i](x) for i, x in enumerate(new_inputs)]
new_indices = tf.dynamic_partition(
tf.range(0, inp.get_shape()[0]), partitions, len(fns))
return tf.dynamic_stitch(new_indices, new_outputs)
def plus_1(inputs):
return inputs + 1.0
def minus_1(inputs):
return inputs - 1.0
def build_split_apply_merge_model():
"""Build the Split-Apply-Merge Model.
Route each value of input [-1, -1, 1, 1] through one of the
functions, plus_1, minus_1. The decision for routing is made by
4 Bernoulli R.V.s whose parameters are determined by a neural network
applied to the input. REINFORCE is used to update the NN parameters.
Returns:
The 3-tuple (route_selection, routing_loss, final_loss), where:
- route_selection is an int 4-vector
- routing_loss is a float 4-vector
- final_loss is a float scalar.
"""
inputs = tf.constant([[-1.0], [-1.0], [1.0], [1.0]])
targets = tf.constant([[0.0], [0.0], [0.0], [0.0]])
paths = [plus_1, minus_1]
weights = tf.get_variable("w", [1, 2])
bias = tf.get_variable("b", [1, 1])
logits = tf.matmul(inputs, weights) + bias
# REINFORCE forward step
route_selection = st.StochasticTensor(
distributions.Categorical(logits=logits))
# Accessing route_selection as a Tensor below forces a sample of
# the Categorical distribution based on its logits.
# This is equivalent to calling route_selection.value().
#
# route_selection.value() returns an int32 4-vector with random
# values in {0, 1}
# COPY+ROUTE+PASTE
outputs = split_apply_merge(inputs, route_selection, paths)
# flatten routing_loss to a row vector (from a column vector)
routing_loss = tf.reshape(tf.square(outputs - targets), shape=[-1])
# Total loss: score function loss + routing loss.
# The score function loss (through `route_selection.loss(routing_loss)`)
# returns:
# [stop_gradient(routing_loss) *
# route_selection.log_pmf(stop_gradient(route_selection.value()))],
# where log_pmf has gradients going all the way back to weights and bias.
# In this case, the routing_loss depends on the variables only through
# "route_selection", which has a stop_gradient on it. So the
# gradient of the loss really come through the score function
surrogate_loss = sg.surrogate_loss([routing_loss])
final_loss = tf.reduce_sum(surrogate_loss)
return (route_selection, routing_loss, final_loss)
class REINFORCESimpleExample(tf.test.TestCase):
def testSplitApplyMerge(self):
# Repeatability. SGD has a tendency to jump around, even here.
tf.set_random_seed(1)
with self.test_session() as sess:
# Use sampling to train REINFORCE
with st.value_type(st.SampleValue()):
(route_selection,
routing_loss,
final_loss) = build_split_apply_merge_model()
sgd = tf.train.GradientDescentOptimizer(1.0).minimize(final_loss)
tf.global_variables_initializer().run()
for i in range(10):
# Run loss and inference step. This toy problem converges VERY quickly.
(routing_loss_v, final_loss_v, route_selection_v, _) = sess.run(
[routing_loss, final_loss, tf.identity(route_selection), sgd])
print(
"Iteration %d, routing loss: %s, final_loss: %s, "
"route selection: %s"
% (i, routing_loss_v, final_loss_v, route_selection_v))
self.assertAllEqual([0, 0, 1, 1], route_selection_v)
self.assertAllClose([0.0, 0.0, 0.0, 0.0], routing_loss_v)
self.assertAllClose(0.0, final_loss_v)
if __name__ == "__main__":
tf.test.main()

View File

@ -1,352 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for Monte Carlo Ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib import layers as layers_lib
from tensorflow.contrib.bayesflow.python.ops import entropy_impl as entropy
from tensorflow.contrib.distributions.python.ops import mvn_diag as mvn_diag_lib
from tensorflow.contrib.distributions.python.ops import mvn_tril as mvn_tril_lib
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import variables
from tensorflow.python.ops.distributions import kullback_leibler as kullback_leibler_lib
from tensorflow.python.ops.distributions import normal as normal_lib
from tensorflow.python.ops.distributions import util as distribution_util
from tensorflow.python.platform import test
layers = layers_lib
class NormalNoEntropy(normal_lib.Normal): # pylint: disable=no-init
"""Normal distribution without a `.entropy` method."""
def entropy(self):
return NotImplementedError('Entropy removed by gremlins')
def get_train_op(scalar_loss, optimizer='SGD', learning_rate=1.0, decay=0.0):
global_step = variables.Variable(0)
def decay_fn(rate, t):
return rate * (1 + math_ops.to_float(t))**(-decay)
train_op = layers.optimize_loss(
scalar_loss,
global_step,
learning_rate,
optimizer,
learning_rate_decay_fn=decay_fn)
return train_op
def _assert_monotonic_decreasing(array, atol=1e-5):
array = np.asarray(array)
_assert_monotonic_increasing(-array, atol=atol)
def _assert_monotonic_increasing(array, atol=1e-5):
array = np.asarray(array)
diff = np.diff(array.ravel())
np.testing.assert_array_less(-1 * atol, diff)
class ElboRatioTest(test.TestCase):
"""Show sampling converges to true KL values."""
def setUp(self):
self._rng = np.random.RandomState(0)
def test_convergence_to_kl_using_sample_form_on_3dim_normal(self):
# Test that the sample mean KL is the same as analytic when we use samples
# to estimate every part of the KL divergence ratio.
vector_shape = (2, 3)
n_samples = 5000
with self.test_session():
q = mvn_diag_lib.MultivariateNormalDiag(
loc=self._rng.rand(*vector_shape),
scale_diag=self._rng.rand(*vector_shape))
p = mvn_diag_lib.MultivariateNormalDiag(
loc=self._rng.rand(*vector_shape),
scale_diag=self._rng.rand(*vector_shape))
# In this case, the log_ratio is the KL.
sample_kl = -1 * entropy.elbo_ratio(
log_p=p.log_prob,
q=q,
n=n_samples,
form=entropy.ELBOForms.sample,
seed=42)
actual_kl = kullback_leibler_lib.kl_divergence(q, p)
# Relative tolerance (rtol) chosen 2 times as large as minimim needed to
# pass.
self.assertEqual((2,), sample_kl.get_shape())
self.assertAllClose(actual_kl.eval(), sample_kl.eval(), rtol=0.05)
def test_convergence_to_kl_using_analytic_entropy_form_on_3dim_normal(self):
# Test that the sample mean KL is the same as analytic when we use an
# analytic entropy combined with sampled cross-entropy.
n_samples = 5000
vector_shape = (2, 3)
with self.test_session():
q = mvn_diag_lib.MultivariateNormalDiag(
loc=self._rng.rand(*vector_shape),
scale_diag=self._rng.rand(*vector_shape))
p = mvn_diag_lib.MultivariateNormalDiag(
loc=self._rng.rand(*vector_shape),
scale_diag=self._rng.rand(*vector_shape))
# In this case, the log_ratio is the KL.
sample_kl = -1 * entropy.elbo_ratio(
log_p=p.log_prob,
q=q,
n=n_samples,
form=entropy.ELBOForms.analytic_entropy,
seed=42)
actual_kl = kullback_leibler_lib.kl_divergence(q, p)
# Relative tolerance (rtol) chosen 2 times as large as minimim needed to
# pass.
self.assertEqual((2,), sample_kl.get_shape())
self.assertAllClose(actual_kl.eval(), sample_kl.eval(), rtol=0.1)
def test_sample_kl_zero_when_p_and_q_are_the_same_distribution(self):
n_samples = 50
vector_shape = (2, 3)
with self.test_session():
q = mvn_diag_lib.MultivariateNormalDiag(
loc=self._rng.rand(*vector_shape),
scale_diag=self._rng.rand(*vector_shape))
# In this case, the log_ratio is the KL.
sample_kl = -1 * entropy.elbo_ratio(
log_p=q.log_prob,
q=q,
n=n_samples,
form=entropy.ELBOForms.sample,
seed=42)
self.assertEqual((2,), sample_kl.get_shape())
self.assertAllClose(np.zeros(2), sample_kl.eval())
class EntropyShannonTest(test.TestCase):
def test_normal_entropy_default_form_uses_exact_entropy(self):
with self.test_session():
dist = normal_lib.Normal(loc=1.11, scale=2.22)
mc_entropy = entropy.entropy_shannon(dist, n=11)
exact_entropy = dist.entropy()
self.assertEqual(exact_entropy.get_shape(), mc_entropy.get_shape())
self.assertAllClose(exact_entropy.eval(), mc_entropy.eval())
def test_normal_entropy_analytic_form_uses_exact_entropy(self):
with self.test_session():
dist = normal_lib.Normal(loc=1.11, scale=2.22)
mc_entropy = entropy.entropy_shannon(
dist, form=entropy.ELBOForms.analytic_entropy)
exact_entropy = dist.entropy()
self.assertEqual(exact_entropy.get_shape(), mc_entropy.get_shape())
self.assertAllClose(exact_entropy.eval(), mc_entropy.eval())
def test_normal_entropy_sample_form_gets_approximate_answer(self):
# Tested by showing we get a good answer that is not exact.
with self.test_session():
dist = normal_lib.Normal(loc=1.11, scale=2.22)
mc_entropy = entropy.entropy_shannon(
dist, n=1000, form=entropy.ELBOForms.sample, seed=0)
exact_entropy = dist.entropy()
self.assertEqual(exact_entropy.get_shape(), mc_entropy.get_shape())
# Relative tolerance (rtol) chosen 2 times as large as minimim needed to
# pass.
self.assertAllClose(exact_entropy.eval(), mc_entropy.eval(), rtol=0.01)
# Make sure there is some error, proving we used samples
self.assertLess(0.0001, math_ops.abs(exact_entropy - mc_entropy).eval())
def test_default_entropy_falls_back_on_sample_if_analytic_not_available(self):
# Tested by showing we get a good answer that is not exact.
with self.test_session():
# NormalNoEntropy is like a Normal, but does not have .entropy method, so
# we are forced to fall back on sample entropy.
dist_no_entropy = NormalNoEntropy(loc=1.11, scale=2.22)
dist_yes_entropy = normal_lib.Normal(loc=1.11, scale=2.22)
mc_entropy = entropy.entropy_shannon(
dist_no_entropy, n=1000, form=entropy.ELBOForms.sample, seed=0)
exact_entropy = dist_yes_entropy.entropy()
self.assertEqual(exact_entropy.get_shape(), mc_entropy.get_shape())
# Relative tolerance (rtol) chosen 2 times as large as minimim needed to
# pass.
self.assertAllClose(exact_entropy.eval(), mc_entropy.eval(), rtol=0.01)
# Make sure there is some error, proving we used samples
self.assertLess(0.0001, math_ops.abs(exact_entropy - mc_entropy).eval())
class RenyiRatioTest(test.TestCase):
"""Show renyi_ratio is minimized when the distributions match."""
def setUp(self):
self._rng = np.random.RandomState(0)
def test_fitting_two_dimensional_normal_n_equals_1000(self):
# Minmizing Renyi divergence should allow us to make one normal match
# another one exactly.
n = 1000
mu_true = np.array([1.0, -1.0], dtype=np.float64)
chol_true = np.array([[2.0, 0.0], [0.5, 1.0]], dtype=np.float64)
with self.test_session() as sess:
target = mvn_tril_lib.MultivariateNormalTriL(mu_true, chol_true)
# Set up q distribution by defining mean/covariance as Variables
mu = variables.Variable(
np.zeros(mu_true.shape), dtype=mu_true.dtype, name='mu')
mat = variables.Variable(
np.zeros(chol_true.shape), dtype=chol_true.dtype, name='mat')
chol = distribution_util.matrix_diag_transform(
mat, transform=nn_ops.softplus)
q = mvn_tril_lib.MultivariateNormalTriL(mu, chol)
for alpha in [0.25, 0.75]:
negative_renyi_divergence = entropy.renyi_ratio(
log_p=target.log_prob, q=q, n=n, alpha=alpha, seed=0)
train_op = get_train_op(
math_ops.reduce_mean(-negative_renyi_divergence),
optimizer='SGD',
learning_rate=0.5,
decay=0.1)
variables.global_variables_initializer().run()
renyis = []
for step in range(1000):
sess.run(train_op)
if step in [1, 5, 100]:
renyis.append(negative_renyi_divergence.eval())
# This optimization should maximize the renyi divergence.
_assert_monotonic_increasing(renyis, atol=0)
# Relative tolerance (rtol) chosen 2 times as large as minimim needed to
# pass.
self.assertAllClose(target.loc.eval(), q.loc.eval(), rtol=0.06)
self.assertAllClose(target.scale.to_dense().eval(),
q.scale.to_dense().eval(),
rtol=0.1)
def test_divergence_between_identical_distributions_is_zero(self):
n = 1000
vector_shape = (2, 3)
with self.test_session():
q = mvn_diag_lib.MultivariateNormalDiag(
loc=self._rng.rand(*vector_shape),
scale_diag=self._rng.rand(*vector_shape))
for alpha in [0.25, 0.75]:
negative_renyi_divergence = entropy.renyi_ratio(
log_p=q.log_prob, q=q, n=n, alpha=alpha, seed=0)
self.assertEqual((2,), negative_renyi_divergence.get_shape())
self.assertAllClose(np.zeros(2), negative_renyi_divergence.eval())
class RenyiAlphaTest(test.TestCase):
def test_with_three_alphas(self):
with self.test_session():
for dtype in (dtypes.float32, dtypes.float64):
alpha_min = constant_op.constant(0.0, dtype=dtype)
alpha_max = 0.5
decay_time = 3
alpha_0 = entropy.renyi_alpha(
0, decay_time, alpha_min=alpha_min, alpha_max=alpha_max)
alpha_1 = entropy.renyi_alpha(
1, decay_time, alpha_min=alpha_min, alpha_max=alpha_max)
alpha_2 = entropy.renyi_alpha(
2, decay_time, alpha_min=alpha_min, alpha_max=alpha_max)
alpha_3 = entropy.renyi_alpha(
3, decay_time, alpha_min=alpha_min, alpha_max=alpha_max)
# Alpha should start at alpha_max.
self.assertAllClose(alpha_max, alpha_0.eval(), atol=1e-5)
# Alpha should finish at alpha_min.
self.assertAllClose(alpha_min.eval(), alpha_3.eval(), atol=1e-5)
# In between, alpha should be monotonically decreasing.
_assert_monotonic_decreasing(
[alpha_0.eval(), alpha_1.eval(), alpha_2.eval(), alpha_3.eval()])
def test_non_scalar_input_raises(self):
with self.test_session():
# Good values here
step = 0
alpha_min = 0.0
alpha_max = 0.5
decay_time = 3
# Use one bad value inside each check.
# The "bad" value is always the non-scalar one.
with self.assertRaisesRegexp(ValueError, 'must be scalar'):
entropy.renyi_alpha(
[step], decay_time, alpha_min=alpha_min, alpha_max=alpha_max).eval()
with self.assertRaisesRegexp(ValueError, 'must be scalar'):
entropy.renyi_alpha(
step, [decay_time], alpha_min=alpha_min, alpha_max=alpha_max).eval()
with self.assertRaisesRegexp(ValueError, 'must be scalar'):
entropy.renyi_alpha(
step, decay_time, alpha_min=[alpha_min], alpha_max=alpha_max).eval()
with self.assertRaisesRegexp(ValueError, 'must be scalar'):
entropy.renyi_alpha(
step, decay_time, alpha_min=alpha_min, alpha_max=[alpha_max]).eval()
def test_input_with_wrong_sign_raises(self):
with self.test_session():
# Good values here
step = 0
alpha_min = 0.0
alpha_max = 0.5
decay_time = 3
# Use one bad value inside each check.
# The "bad" value is always the non-scalar one.
with self.assertRaisesOpError('decay_time must be positive'):
entropy.renyi_alpha(
step, 0.0, alpha_min=alpha_min, alpha_max=alpha_max).eval()
with self.assertRaisesOpError('step must be non-negative'):
entropy.renyi_alpha(
-1, decay_time, alpha_min=alpha_min, alpha_max=alpha_max).eval()
if __name__ == '__main__':
test.main()

View File

@ -1,206 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for stochastic graphs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib import distributions
from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import gradient_checker
from tensorflow.python.ops import gradients_impl
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
st = stochastic_tensor
sge = stochastic_gradient_estimators
dists = distributions
def _vimco(loss):
"""Python implementation of VIMCO."""
n = loss.shape[0]
log_loss = np.log(loss)
geometric_mean = []
for j in range(n):
geometric_mean.append(
np.exp(np.mean([log_loss[i, :] for i in range(n) if i != j], 0)))
geometric_mean = np.array(geometric_mean)
learning_signal = []
for j in range(n):
learning_signal.append(np.sum([loss[i, :] for i in range(n) if i != j], 0))
learning_signal = np.array(learning_signal)
local_learning_signal = np.log(1 / n * (learning_signal + geometric_mean))
# log_mean - local_learning_signal
log_mean = np.log(np.mean(loss, 0))
advantage = log_mean - local_learning_signal
return advantage
class StochasticGradientEstimatorsTest(test.TestCase):
def setUp(self):
self._p = constant_op.constant(0.999999)
self._final_loss = constant_op.constant(3.2)
def _testScoreFunction(self, loss_fn, expected):
x = st.StochasticTensor(dists.Bernoulli(probs=self._p), loss_fn=loss_fn)
sf = x.loss(self._final_loss)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
self.assertAllClose(*sess.run([expected, sf]))
def testScoreFunction(self):
expected = math_ops.log(self._p) * self._final_loss
self._testScoreFunction(sge.score_function, expected)
def testScoreFunctionWithConstantBaseline(self):
b = constant_op.constant(9.8)
expected = math_ops.log(self._p) * (self._final_loss - b)
self._testScoreFunction(
sge.get_score_function_with_constant_baseline(b), expected)
def testScoreFunctionWithBaselineFn(self):
b = constant_op.constant(9.8)
def baseline_fn(stoch_tensor, loss):
self.assertTrue(isinstance(stoch_tensor, st.StochasticTensor))
self.assertTrue(isinstance(loss, ops.Tensor))
return b
expected = math_ops.log(self._p) * (self._final_loss - b)
self._testScoreFunction(
sge.get_score_function_with_baseline(baseline_fn), expected)
def testScoreFunctionWithMeanBaseline(self):
ema_decay = 0.8
num_steps = 6
x = st.StochasticTensor(
dists.Bernoulli(probs=self._p),
loss_fn=sge.get_score_function_with_baseline(
sge.get_mean_baseline(ema_decay)))
sf = x.loss(self._final_loss)
# Expected EMA value
ema = 0.
for _ in range(num_steps):
ema -= (1. - ema_decay) * (ema - self._final_loss)
# Baseline is EMA with bias correction
bias_correction = 1. - ema_decay**num_steps
baseline = ema / bias_correction
expected = math_ops.log(self._p) * (self._final_loss - baseline)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
for _ in range(num_steps - 1):
sess.run(sf) # run to update EMA
self.assertAllClose(*sess.run([expected, sf]))
def testScoreFunctionWithAdvantageFn(self):
b = constant_op.constant(9.8)
def advantage_fn(stoch_tensor, loss):
self.assertTrue(isinstance(stoch_tensor, st.StochasticTensor))
self.assertTrue(isinstance(loss, ops.Tensor))
return loss - b
expected = math_ops.log(self._p) * (self._final_loss - b)
self._testScoreFunction(
sge.get_score_function_with_advantage(advantage_fn), expected)
def testVIMCOAdvantageFn(self):
# simple_loss: (3, 2) with 3 samples, batch size 2
simple_loss = np.array(
[[1.0, 1.5],
[1e-6, 1e4],
[2.0, 3.0]])
# random_loss: (100, 50, 64) with 100 samples, batch shape (50, 64)
random_loss = 100 * np.random.rand(100, 50, 64)
advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=False)
with self.test_session() as sess:
for loss in [simple_loss, random_loss]:
expected = _vimco(loss)
loss_t = constant_op.constant(loss, dtype=dtypes.float32)
advantage_t = advantage_fn(None, loss_t) # ST is not used
advantage = sess.run(advantage_t)
self.assertEqual(expected.shape, advantage_t.get_shape())
self.assertAllClose(expected, advantage, atol=5e-5)
def testVIMCOAdvantageGradients(self):
loss = np.log(
[[1.0, 1.5],
[1e-6, 1e4],
[2.0, 3.0]])
advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=True)
with self.test_session():
loss_t = constant_op.constant(loss, dtype=dtypes.float64)
advantage_t = advantage_fn(None, loss_t) # ST is not used
gradient_error = gradient_checker.compute_gradient_error(
loss_t,
loss_t.get_shape().as_list(),
advantage_t,
advantage_t.get_shape().as_list(),
x_init_value=loss)
self.assertLess(gradient_error, 1e-3)
def testVIMCOAdvantageWithSmallProbabilities(self):
theta_value = np.random.rand(10, 100000)
# Test with float16 dtype to ensure stability even in this extreme case.
theta = constant_op.constant(theta_value, dtype=dtypes.float16)
advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=True)
with self.test_session() as sess:
log_loss = -math_ops.reduce_sum(theta, [1])
advantage_t = advantage_fn(None, log_loss)
grad_t = gradients_impl.gradients(advantage_t, theta)[0]
advantage, grad = sess.run((advantage_t, grad_t))
self.assertTrue(np.all(np.isfinite(advantage)))
self.assertTrue(np.all(np.isfinite(grad)))
def testScoreFunctionWithMeanBaselineHasUniqueVarScope(self):
ema_decay = 0.8
x = st.StochasticTensor(
dists.Bernoulli(probs=self._p),
loss_fn=sge.get_score_function_with_baseline(
sge.get_mean_baseline(ema_decay)))
y = st.StochasticTensor(
dists.Bernoulli(probs=self._p),
loss_fn=sge.get_score_function_with_baseline(
sge.get_mean_baseline(ema_decay)))
sf_x = x.loss(self._final_loss)
sf_y = y.loss(self._final_loss)
with self.test_session() as sess:
# Smoke test
sess.run(variables.global_variables_initializer())
sess.run([sf_x, sf_y])
if __name__ == "__main__":
test.main()

View File

@ -1,246 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for stochastic graphs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib import distributions as distributions_lib
from tensorflow.contrib.bayesflow.python.ops import stochastic_graph_impl
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gradients_impl
from tensorflow.python.ops import math_ops
from tensorflow.python.platform import test
st = stochastic_tensor
sg = stochastic_graph_impl
distributions = distributions_lib
class NormalNotParam(distributions.Normal):
@property
def reparameterization_type(self):
return distributions.NOT_REPARAMETERIZED
class TestSurrogateLosses(test.TestCase):
def testPathwiseDerivativeDoesNotAddSurrogateLosses(self):
with self.test_session():
mu = [0.0, 0.1, 0.2]
sigma = constant_op.constant([1.1, 1.2, 1.3])
with st.value_type(st.SampleValue()):
prior = st.StochasticTensor(distributions.Normal(loc=mu, scale=sigma))
likelihood = st.StochasticTensor(
distributions.Normal(
loc=prior, scale=sigma))
self.assertEqual(
prior.distribution.reparameterization_type,
distributions.FULLY_REPARAMETERIZED)
self.assertEqual(
likelihood.distribution.reparameterization_type,
distributions.FULLY_REPARAMETERIZED)
loss = math_ops.square(array_ops.identity(likelihood) - [0.0, 0.1, 0.2])
sum_loss = math_ops.reduce_sum(loss)
surrogate_loss = sg.surrogate_loss([loss])
with self.assertRaisesRegexp(ValueError, "dimensionality 1 or greater"):
_ = sg.surrogate_loss([sum_loss])
surrogate_from_both = sg.surrogate_loss(
[loss, sum_loss * array_ops.ones_like(loss)])
# Pathwise derivative terms do not require add'l surrogate loss terms.
with self.test_session() as sess:
self.assertAllClose(*sess.run([loss, surrogate_loss]))
self.assertAllClose(*sess.run([(loss + sum_loss), surrogate_from_both]))
def _testSurrogateLoss(self, session, losses, expected_addl_terms, xs):
surrogate_loss = sg.surrogate_loss(losses)
expected_surrogate_loss = math_ops.add_n(losses + expected_addl_terms)
self.assertAllClose(*session.run([surrogate_loss, expected_surrogate_loss]))
# Test backprop
expected_grads = gradients_impl.gradients(ys=expected_surrogate_loss, xs=xs)
surrogate_grads = gradients_impl.gradients(ys=surrogate_loss, xs=xs)
self.assertEqual(len(expected_grads), len(surrogate_grads))
grad_values = session.run(expected_grads + surrogate_grads)
n_grad = len(expected_grads)
self.assertAllClose(grad_values[:n_grad], grad_values[n_grad:])
def testSurrogateLoss(self):
with self.test_session() as sess:
mu = constant_op.constant([0.0, 0.1, 0.2])
sigma = constant_op.constant([1.1, 1.2, 1.3])
with st.value_type(st.SampleValue()):
prior = st.StochasticTensor(NormalNotParam(loc=mu, scale=sigma))
likelihood = st.StochasticTensor(NormalNotParam(loc=prior, scale=sigma))
prior_2 = st.StochasticTensor(NormalNotParam(loc=mu, scale=sigma))
loss = math_ops.square(array_ops.identity(likelihood) - mu)
part_loss = math_ops.square(array_ops.identity(prior) - mu)
sum_loss = math_ops.reduce_sum(loss)
loss_nodeps = math_ops.square(array_ops.identity(prior_2) - mu)
# For ground truth, use the stop-gradient versions of the losses
loss_nograd = array_ops.stop_gradient(loss)
loss_nodeps_nograd = array_ops.stop_gradient(loss_nodeps)
sum_loss_nograd = array_ops.stop_gradient(sum_loss)
# These score functions should ignore prior_2
self._testSurrogateLoss(
session=sess,
losses=[loss],
expected_addl_terms=[
likelihood.distribution.log_prob(
likelihood.value()) * loss_nograd,
prior.distribution.log_prob(prior.value()) * loss_nograd
],
xs=[mu, sigma])
self._testSurrogateLoss(
session=sess,
losses=[loss, part_loss],
expected_addl_terms=[
likelihood.distribution.log_prob(
likelihood.value()) * loss_nograd,
(prior.distribution.log_prob(prior.value()) *
array_ops.stop_gradient(part_loss + loss))
],
xs=[mu, sigma])
self._testSurrogateLoss(
session=sess,
losses=[sum_loss * array_ops.ones_like(loss)],
expected_addl_terms=[(
likelihood.distribution.log_prob(likelihood.value()) *
sum_loss_nograd), prior.distribution.log_prob(prior.value()) *
sum_loss_nograd],
xs=[mu, sigma])
self._testSurrogateLoss(
session=sess,
losses=[loss, sum_loss * array_ops.ones_like(loss)],
expected_addl_terms=[(
likelihood.distribution.log_prob(likelihood.value()) *
array_ops.stop_gradient(loss + sum_loss)),
(prior.distribution.log_prob(prior.value()) *
array_ops.stop_gradient(loss + sum_loss))],
xs=[mu, sigma])
# These score functions should ignore prior and likelihood
self._testSurrogateLoss(
session=sess,
losses=[loss_nodeps],
expected_addl_terms=[(prior_2.distribution.log_prob(prior_2.value()) *
loss_nodeps_nograd)],
xs=[mu, sigma])
# These score functions should include all terms selectively
self._testSurrogateLoss(
session=sess,
losses=[loss, loss_nodeps],
# We can't guarantee ordering of output losses in this case.
expected_addl_terms=[(
likelihood.distribution.log_prob(likelihood.value()) *
loss_nograd), prior.distribution.log_prob(prior.value()) *
loss_nograd,
(prior_2.distribution.log_prob(prior_2.value()) *
loss_nodeps_nograd)],
xs=[mu, sigma])
def testNoSurrogateLoss(self):
with self.test_session():
mu = constant_op.constant([0.0, 0.1, 0.2])
sigma = constant_op.constant([1.1, 1.2, 1.3])
with st.value_type(st.SampleValue()):
dt = st.StochasticTensor(
NormalNotParam(
loc=mu, scale=sigma), loss_fn=None)
self.assertEqual(None, dt.loss(constant_op.constant([2.0])))
def testExplicitStochasticTensors(self):
with self.test_session() as sess:
mu = constant_op.constant([0.0, 0.1, 0.2])
sigma = constant_op.constant([1.1, 1.2, 1.3])
with st.value_type(st.SampleValue()):
dt1 = st.StochasticTensor(NormalNotParam(loc=mu, scale=sigma))
dt2 = st.StochasticTensor(NormalNotParam(loc=mu, scale=sigma))
loss = math_ops.square(array_ops.identity(dt1)) + 10. + dt2
sl_all = sg.surrogate_loss([loss])
sl_dt1 = sg.surrogate_loss([loss], stochastic_tensors=[dt1])
sl_dt2 = sg.surrogate_loss([loss], stochastic_tensors=[dt2])
dt1_term = dt1.distribution.log_prob(dt1) * loss
dt2_term = dt2.distribution.log_prob(dt2) * loss
self.assertAllClose(*sess.run(
[sl_all, sum([loss, dt1_term, dt2_term])]))
self.assertAllClose(*sess.run([sl_dt1, sum([loss, dt1_term])]))
self.assertAllClose(*sess.run([sl_dt2, sum([loss, dt2_term])]))
class StochasticDependenciesMapTest(test.TestCase):
def testBuildsMapOfUpstreamNodes(self):
dt1 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.))
dt2 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.))
out1 = dt1.value() + 1.
out2 = dt2.value() + 2.
x = out1 + out2
y = out2 * 3.
dep_map = sg._stochastic_dependencies_map([x, y])
self.assertEqual(dep_map[dt1], set([x]))
self.assertEqual(dep_map[dt2], set([x, y]))
def testHandlesStackedStochasticNodes(self):
dt1 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.))
out1 = dt1.value() + 1.
dt2 = st.StochasticTensor(distributions.Normal(loc=out1, scale=1.))
x = dt2.value() + 2.
dt3 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.))
y = dt3.value() * 3.
dep_map = sg._stochastic_dependencies_map([x, y])
self.assertEqual(dep_map[dt1], set([x]))
self.assertEqual(dep_map[dt2], set([x]))
self.assertEqual(dep_map[dt3], set([y]))
def testTraversesControlInputs(self):
dt1 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.))
logits = dt1.value() * 3.
dt2 = st.StochasticTensor(distributions.Bernoulli(logits=logits))
dt3 = st.StochasticTensor(distributions.Normal(loc=0., scale=1.))
x = dt3.value()
y = array_ops.ones((2, 2)) * 4.
z = array_ops.ones((2, 2)) * 3.
out = control_flow_ops.cond(
math_ops.cast(dt2, dtypes.bool), lambda: math_ops.add(x, y),
lambda: math_ops.square(z))
out += 5.
dep_map = sg._stochastic_dependencies_map([out])
self.assertEqual(dep_map[dt1], set([out]))
self.assertEqual(dep_map[dt2], set([out]))
self.assertEqual(dep_map[dt3], set([out]))
if __name__ == "__main__":
test.main()

View File

@ -1,239 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for stochastic graphs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor_impl
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.distributions import normal
from tensorflow.python.platform import test
sge = stochastic_gradient_estimators
st = stochastic_tensor_impl
class StochasticTensorTest(test.TestCase):
def testConstructionAndValue(self):
with self.test_session() as sess:
mu = [0.0, 0.1, 0.2]
sigma = constant_op.constant([1.1, 1.2, 1.3])
sigma2 = constant_op.constant([0.1, 0.2, 0.3])
prior_default = st.StochasticTensor(
normal.Normal(loc=mu, scale=sigma))
self.assertTrue(isinstance(prior_default.value_type, st.SampleValue))
prior_0 = st.StochasticTensor(
normal.Normal(loc=mu, scale=sigma),
dist_value_type=st.SampleValue())
self.assertTrue(isinstance(prior_0.value_type, st.SampleValue))
with st.value_type(st.SampleValue()):
prior = st.StochasticTensor(normal.Normal(loc=mu, scale=sigma))
self.assertTrue(isinstance(prior.value_type, st.SampleValue))
likelihood = st.StochasticTensor(
normal.Normal(loc=prior, scale=sigma2))
self.assertTrue(isinstance(likelihood.value_type, st.SampleValue))
coll = ops.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
self.assertEqual(coll, [prior_default, prior_0, prior, likelihood])
# Also works: tf.convert_to_tensor(prior)
prior_default = array_ops.identity(prior_default)
prior_0 = array_ops.identity(prior_0)
prior = array_ops.identity(prior)
likelihood = array_ops.identity(likelihood)
# Mostly a smoke test for now...
prior_0_val, prior_val, prior_default_val, _ = sess.run(
[prior_0, prior, prior_default, likelihood])
self.assertEqual(prior_0_val.shape, prior_val.shape)
self.assertEqual(prior_default_val.shape, prior_val.shape)
# These are different random samples from the same distribution,
# so the values should differ.
self.assertGreater(np.abs(prior_0_val - prior_val).sum(), 1e-6)
self.assertGreater(np.abs(prior_default_val - prior_val).sum(), 1e-6)
def testMeanValue(self):
with self.test_session() as sess:
mu = [0.0, -1.0, 1.0]
sigma = constant_op.constant([1.1, 1.2, 1.3])
with st.value_type(st.MeanValue()):
prior = st.StochasticTensor(normal.Normal(loc=mu, scale=sigma))
self.assertTrue(isinstance(prior.value_type, st.MeanValue))
prior_mean = prior.mean()
prior_value = prior.value()
prior_mean_val, prior_value_val = sess.run([prior_mean, prior_value])
self.assertAllEqual(prior_mean_val, mu)
self.assertAllEqual(prior_mean_val, prior_value_val)
def testSampleValueScalar(self):
with self.test_session() as sess:
mu = [[0.0, -1.0, 1.0], [0.0, -1.0, 1.0]]
sigma = constant_op.constant([[1.1, 1.2, 1.3], [1.1, 1.2, 1.3]])
with st.value_type(st.SampleValue()):
prior_single = st.StochasticTensor(
normal.Normal(loc=mu, scale=sigma))
prior_single_value = prior_single.value()
self.assertEqual(prior_single_value.get_shape(), (2, 3))
prior_single_value_val = sess.run([prior_single_value])[0]
self.assertEqual(prior_single_value_val.shape, (2, 3))
with st.value_type(st.SampleValue(1)):
prior_single = st.StochasticTensor(
normal.Normal(loc=mu, scale=sigma))
self.assertTrue(isinstance(prior_single.value_type, st.SampleValue))
prior_single_value = prior_single.value()
self.assertEqual(prior_single_value.get_shape(), (1, 2, 3))
prior_single_value_val = sess.run([prior_single_value])[0]
self.assertEqual(prior_single_value_val.shape, (1, 2, 3))
with st.value_type(st.SampleValue(2)):
prior_double = st.StochasticTensor(
normal.Normal(loc=mu, scale=sigma))
prior_double_value = prior_double.value()
self.assertEqual(prior_double_value.get_shape(), (2, 2, 3))
prior_double_value_val = sess.run([prior_double_value])[0]
self.assertEqual(prior_double_value_val.shape, (2, 2, 3))
def testDistributionEntropy(self):
with self.test_session() as sess:
mu = [0.0, -1.0, 1.0]
sigma = constant_op.constant([1.1, 1.2, 1.3])
with st.value_type(st.MeanValue()):
prior = st.StochasticTensor(normal.Normal(loc=mu, scale=sigma))
entropy = prior.entropy()
deep_entropy = prior.distribution.entropy()
expected_deep_entropy = normal.Normal(
loc=mu, scale=sigma).entropy()
entropies = sess.run([entropy, deep_entropy, expected_deep_entropy])
self.assertAllEqual(entropies[2], entropies[0])
self.assertAllEqual(entropies[1], entropies[0])
def testSurrogateLoss(self):
with self.test_session():
mu = [[3.0, -4.0, 5.0], [6.0, -7.0, 8.0]]
sigma = constant_op.constant(1.0)
# With default
with st.value_type(st.MeanValue(stop_gradient=True)):
dt = st.StochasticTensor(normal.Normal(loc=mu, scale=sigma))
loss = dt.loss([constant_op.constant(2.0)])
self.assertTrue(loss is not None)
self.assertAllClose(
dt.distribution.log_prob(mu).eval() * 2.0, loss.eval())
# With passed-in loss_fn.
dt = st.StochasticTensor(
normal.Normal(loc=mu, scale=sigma),
dist_value_type=st.MeanValue(stop_gradient=True),
loss_fn=sge.get_score_function_with_constant_baseline(
baseline=constant_op.constant(8.0)))
loss = dt.loss([constant_op.constant(2.0)])
self.assertTrue(loss is not None)
self.assertAllClose((dt.distribution.log_prob(mu) * (2.0 - 8.0)).eval(),
loss.eval())
class ValueTypeTest(test.TestCase):
def testValueType(self):
type_mean = st.MeanValue()
type_reshape = st.SampleValue()
type_full = st.SampleValue()
with st.value_type(type_mean):
self.assertEqual(st.get_current_value_type(), type_mean)
with st.value_type(type_reshape):
self.assertEqual(st.get_current_value_type(), type_reshape)
with st.value_type(type_full):
self.assertEqual(st.get_current_value_type(), type_full)
self.assertEqual(st.get_current_value_type(), type_mean)
with self.assertRaisesRegexp(ValueError, "No value type currently set"):
st.get_current_value_type()
class ObservedStochasticTensorTest(test.TestCase):
def testConstructionAndValue(self):
with self.test_session() as sess:
mu = [0.0, 0.1, 0.2]
sigma = constant_op.constant([1.1, 1.2, 1.3])
obs = array_ops.zeros((2, 3))
z = st.ObservedStochasticTensor(
normal.Normal(loc=mu, scale=sigma), value=obs)
[obs_val, z_val] = sess.run([obs, z.value()])
self.assertAllEqual(obs_val, z_val)
coll = ops.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
self.assertEqual(coll, [z])
def testConstructionWithUnknownShapes(self):
mu = array_ops.placeholder(dtypes.float32)
sigma = array_ops.placeholder(dtypes.float32)
obs = array_ops.placeholder(dtypes.float32)
z = st.ObservedStochasticTensor(
normal.Normal(loc=mu, scale=sigma), value=obs)
mu2 = array_ops.placeholder(dtypes.float32, shape=[None])
sigma2 = array_ops.placeholder(dtypes.float32, shape=[None])
obs2 = array_ops.placeholder(dtypes.float32, shape=[None, None])
z2 = st.ObservedStochasticTensor(
normal.Normal(loc=mu2, scale=sigma2), value=obs2)
coll = ops.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
self.assertEqual(coll, [z, z2])
def testConstructionErrors(self):
mu = [0., 0.]
sigma = [1., 1.]
self.assertRaises(
ValueError,
st.ObservedStochasticTensor,
normal.Normal(loc=mu, scale=sigma),
value=array_ops.zeros((3,)))
self.assertRaises(
ValueError,
st.ObservedStochasticTensor,
normal.Normal(loc=mu, scale=sigma),
value=array_ops.zeros((3, 1)))
self.assertRaises(
ValueError,
st.ObservedStochasticTensor,
normal.Normal(loc=mu, scale=sigma),
value=array_ops.zeros((1, 2), dtype=dtypes.int32))
if __name__ == "__main__":
test.main()

View File

@ -1,168 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for stochastic graphs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib import distributions
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
from tensorflow.contrib.bayesflow.python.ops import stochastic_variables
from tensorflow.contrib.bayesflow.python.ops import variational_inference_impl
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
sv = stochastic_variables
st = stochastic_tensor
vi = variational_inference_impl
dist = distributions
class StochasticVariablesTest(test.TestCase):
def testStochasticVariables(self):
shape = (10, 20)
with variable_scope.variable_scope(
"stochastic_variables",
custom_getter=sv.make_stochastic_variable_getter(
dist_cls=dist.NormalWithSoftplusScale)):
v = variable_scope.get_variable("sv", shape)
self.assertTrue(isinstance(v, st.StochasticTensor))
self.assertTrue(isinstance(v.distribution, dist.NormalWithSoftplusScale))
self.assertEqual(
{"stochastic_variables/sv_loc", "stochastic_variables/sv_scale"},
set([v.op.name for v in variables.global_variables()]))
self.assertEqual(
set(variables.trainable_variables()), set(variables.global_variables()))
v = ops.convert_to_tensor(v)
self.assertEqual(list(shape), v.get_shape().as_list())
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
self.assertEqual(shape, sess.run(v).shape)
def testStochasticVariablesWithConstantInitializer(self):
shape = (10, 20)
with variable_scope.variable_scope(
"stochastic_variables",
custom_getter=sv.make_stochastic_variable_getter(
dist_cls=dist.NormalWithSoftplusScale,
dist_kwargs={"validate_args": True},
param_initializers={
"loc": np.ones(shape) * 4.,
"scale": np.ones(shape) * 2.
})):
v = variable_scope.get_variable("sv")
for var in variables.global_variables():
if "loc" in var.name:
mu_var = var
if "scale" in var.name:
sigma_var = var
v = ops.convert_to_tensor(v)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
self.assertEqual(shape, sess.run(v).shape)
def testStochasticVariablesWithCallableInitializer(self):
shape = (10, 20)
def sigma_init(shape, dtype, partition_info):
_ = partition_info
return array_ops.ones(shape, dtype=dtype) * 2.
with variable_scope.variable_scope(
"stochastic_variables",
custom_getter=sv.make_stochastic_variable_getter(
dist_cls=dist.NormalWithSoftplusScale,
dist_kwargs={"validate_args": True},
param_initializers={
"loc": np.ones(
shape, dtype=np.float32) * 4.,
"scale": sigma_init
})):
v = variable_scope.get_variable("sv", shape)
for var in variables.global_variables():
if "loc" in var.name:
mu_var = var
if "scale" in var.name:
sigma_var = var
v = ops.convert_to_tensor(v)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
self.assertEqual(shape, sess.run(v).shape)
def testStochasticVariablesWithPrior(self):
shape = (10, 20)
prior = dist.Normal(0., 1.)
with variable_scope.variable_scope(
"stochastic_variables",
custom_getter=sv.make_stochastic_variable_getter(
dist_cls=dist.NormalWithSoftplusScale, prior=prior)):
w = variable_scope.get_variable("weights", shape)
x = random_ops.random_uniform((8, 10))
y = math_ops.matmul(x, w)
prior_map = vi._find_variational_and_priors(y, None)
self.assertEqual(prior_map[w], prior)
elbo = vi.elbo(y, keep_batch_dim=False)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
sess.run(elbo)
def testStochasticVariablesWithCallablePriorInitializer(self):
def prior_init(shape, dtype):
return dist.Normal(
array_ops.zeros(shape, dtype), array_ops.ones(shape, dtype))
with variable_scope.variable_scope(
"stochastic_variables",
custom_getter=sv.make_stochastic_variable_getter(
dist_cls=dist.NormalWithSoftplusScale, prior=prior_init)):
w = variable_scope.get_variable("weights", (10, 20))
x = random_ops.random_uniform((8, 10))
y = math_ops.matmul(x, w)
prior_map = vi._find_variational_and_priors(y, None)
self.assertTrue(isinstance(prior_map[w], dist.Normal))
elbo = vi.elbo(y, keep_batch_dim=False)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
sess.run(elbo)
if __name__ == "__main__":
test.main()

View File

@ -1,146 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for variational inference."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib import distributions as distributions_lib
from tensorflow.contrib import layers
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
from tensorflow.contrib.bayesflow.python.ops import variational_inference_impl
from tensorflow.python.framework import constant_op
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variables
from tensorflow.python.ops.distributions import kullback_leibler
from tensorflow.python.ops.distributions import normal
from tensorflow.python.platform import test
st = stochastic_tensor
vi = variational_inference_impl
distributions = distributions_lib
class NormalNoEntropy(distributions.Normal):
def entropy(self):
raise NotImplementedError("entropy not implemented")
# For mini-VAE
def inference_net(x, latent_size):
return layers.linear(x, latent_size)
def generative_net(z, data_size):
return layers.linear(z, data_size)
def mini_vae():
x = [[-6., 3., 6.], [-8., 4., 8.]]
prior = distributions.Normal(loc=0., scale=1.)
variational = st.StochasticTensor(
distributions.Normal(
loc=inference_net(x, 1), scale=1.))
vi.register_prior(variational, prior)
px = distributions.Normal(loc=generative_net(variational, 3), scale=1.)
log_likelihood = math_ops.reduce_sum(px.log_prob(x), 1)
log_likelihood = array_ops.expand_dims(log_likelihood, -1)
return x, prior, variational, px, log_likelihood
class VariationalInferenceTest(test.TestCase):
def testDefaultVariationalAndPrior(self):
_, prior, variational, _, log_likelihood = mini_vae()
elbo = vi.elbo(log_likelihood)
expected_elbo = log_likelihood - kullback_leibler.kl_divergence(
variational.distribution, prior)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
self.assertAllEqual(*sess.run([expected_elbo, elbo]))
def testExplicitVariationalAndPrior(self):
with self.test_session() as sess:
_, _, variational, _, log_likelihood = mini_vae()
prior = normal.Normal(loc=3., scale=2.)
elbo = vi.elbo(
log_likelihood, variational_with_prior={variational: prior})
expected_elbo = log_likelihood - kullback_leibler.kl_divergence(
variational.distribution, prior)
sess.run(variables.global_variables_initializer())
self.assertAllEqual(*sess.run([expected_elbo, elbo]))
def testExplicitForms(self):
_, prior, variational, _, log_likelihood = mini_vae()
elbos = []
forms = vi.ELBOForms
for form in [
forms.default, forms.analytic_kl, forms.sample, forms.analytic_entropy
]:
elbo = vi.elbo(
log_likelihood=log_likelihood,
variational_with_prior={variational: prior},
form=form)
elbos.append(elbo)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
log_likelihood_shape = array_ops.shape(log_likelihood).eval()
for elbo in elbos:
elbo.eval()
elbo_shape = array_ops.shape(elbo).eval()
self.assertAllEqual(log_likelihood_shape, elbo_shape)
self.assertEqual(elbo.dtype, log_likelihood.dtype)
def testDefaultsSampleKLWithoutAnalyticKLOrEntropy(self):
x = constant_op.constant([[-6., 3., 6.]])
prior = distributions.Bernoulli(0.5)
variational = st.StochasticTensor(
NormalNoEntropy(
loc=inference_net(x, 1), scale=1.))
vi.register_prior(variational, prior)
px = distributions.Normal(loc=generative_net(variational, 3), scale=1.)
log_likelihood = math_ops.reduce_sum(px.log_prob(x), 1)
# No analytic KL available between prior and variational distributions.
with self.assertRaisesRegexp(NotImplementedError, "No KL"):
distributions.kl_divergence(variational.distribution, prior)
elbo = vi.elbo(
variational_with_prior={variational: prior},
log_likelihood=log_likelihood)
expected_elbo = log_likelihood + prior.log_prob(
variational) - variational.distribution.log_prob(variational)
with self.test_session() as sess:
sess.run(variables.global_variables_initializer())
self.assertAllEqual(*sess.run([expected_elbo, elbo]))
def testElboWithLogJoint(self):
with self.test_session() as sess:
_, prior, variational, _, log_likelihood = mini_vae()
log_joint = log_likelihood + prior.log_prob(variational)
elbo = vi.elbo_with_log_joint(log_joint)
sess.run(variables.global_variables_initializer())
elbo.eval()
if __name__ == "__main__":
test.main()

View File

@ -1,31 +0,0 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Support for Entropy Ops. See ${python/contrib.bayesflow.entropy}."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.contrib.bayesflow.python.ops.entropy_impl import *
# pylint: enable=wildcard-import
from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
'ELBOForms', 'elbo_ratio', 'entropy_shannon', 'renyi_ratio', 'renyi_alpha'
]
remove_undocumented(__name__, _allowed_symbols)

View File

@ -1,386 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Support for Entropy Ops. See ${python/contrib.bayesflow.entropy}.
@@elbo_ratio
@@entropy_shannon
@@renyi_ratio
@@renyi_alpha
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from tensorflow.contrib.bayesflow.python.ops import monte_carlo_impl as monte_carlo
from tensorflow.contrib.bayesflow.python.ops import variational_inference
from tensorflow.contrib.bayesflow.python.ops.monte_carlo_impl import _get_samples as get_samples
from tensorflow.python.framework import ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.platform import tf_logging as logging
# Make utility functions from monte_carlo available.
# pylint: disable=protected-access
_get_samples = get_samples
_logspace_mean = monte_carlo._logspace_mean
_sample_mean = monte_carlo._sample_mean
# pylint: enable=protected-access
__all__ = [
'elbo_ratio',
'entropy_shannon',
'renyi_ratio',
'renyi_alpha',
]
ELBOForms = variational_inference.ELBOForms # pylint: disable=invalid-name
def elbo_ratio(log_p,
q,
z=None,
n=None,
seed=None,
form=None,
name='elbo_ratio'):
r"""Estimate of the ratio appearing in the `ELBO` and `KL` divergence.
With `p(z) := exp{log_p(z)}`, this `Op` returns an approximation of
```
E_q[ Log[p(Z) / q(Z)] ]
```
The term `E_q[ Log[p(Z)] ]` is always computed as a sample mean.
The term `E_q[ Log[q(z)] ]` can be computed with samples, or an exact formula
if `q.entropy()` is defined. This is controlled with the kwarg `form`.
This log-ratio appears in different contexts:
#### `KL[q || p]`
If `log_p(z) = Log[p(z)]` for distribution `p`, this `Op` approximates
the negative Kullback-Leibler divergence.
```
elbo_ratio(log_p, q, n=100) = -1 * KL[q || p],
KL[q || p] = E[ Log[q(Z)] - Log[p(Z)] ]
```
Note that if `p` is a `Distribution`, then
`distributions.kl_divergence(q, p)` may be defined and available as an
exact result.
#### ELBO
If `log_p(z) = Log[p(z, x)]` is the log joint of a distribution `p`, this is
the Evidence Lower BOund (ELBO):
```
ELBO ~= E[ Log[p(Z, x)] - Log[q(Z)] ]
= Log[p(x)] - KL[q || p]
<= Log[p(x)]
```
User supplies either `Tensor` of samples `z`, or number of samples to draw `n`
Args:
log_p: Callable mapping samples from `q` to `Tensors` with
shape broadcastable to `q.batch_shape`.
For example, `log_p` works "just like" `q.log_prob`.
q: `tf.contrib.distributions.Distribution`.
z: `Tensor` of samples from `q`, produced by `q.sample(n)` for some `n`.
n: Integer `Tensor`. Number of samples to generate if `z` is not provided.
seed: Python integer to seed the random number generator.
form: Either `ELBOForms.analytic_entropy` (use formula for entropy of `q`)
or `ELBOForms.sample` (sample estimate of entropy), or `ELBOForms.default`
(attempt analytic entropy, fallback on sample).
Default value is `ELBOForms.default`.
name: A name to give this `Op`.
Returns:
Scalar `Tensor` holding sample mean KL divergence. `shape` is the batch
shape of `q`, and `dtype` is the same as `q`.
Raises:
ValueError: If `form` is not handled by this function.
"""
form = ELBOForms.default if form is None else form
with ops.name_scope(name, values=[n, z]):
z = _get_samples(q, z, n, seed)
entropy = entropy_shannon(q, z=z, form=form)
# If log_p(z) = Log[p(z)], cross entropy = -E_q[log(p(Z))]
negative_cross_entropy = _sample_mean(log_p(z))
return entropy + negative_cross_entropy
def entropy_shannon(p,
z=None,
n=None,
seed=None,
form=None,
name='entropy_shannon'):
r"""Monte Carlo or deterministic computation of Shannon's entropy.
Depending on the kwarg `form`, this `Op` returns either the analytic entropy
of the distribution `p`, or the sampled entropy:
```
-n^{-1} sum_{i=1}^n p.log_prob(z_i), where z_i ~ p,
\approx - E_p[ Log[p(Z)] ]
= Entropy[p]
```
User supplies either `Tensor` of samples `z`, or number of samples to draw `n`
Args:
p: `tf.contrib.distributions.Distribution`
z: `Tensor` of samples from `p`, produced by `p.sample(n)` for some `n`.
n: Integer `Tensor`. Number of samples to generate if `z` is not provided.
seed: Python integer to seed the random number generator.
form: Either `ELBOForms.analytic_entropy` (use formula for entropy of `q`)
or `ELBOForms.sample` (sample estimate of entropy), or `ELBOForms.default`
(attempt analytic entropy, fallback on sample).
Default value is `ELBOForms.default`.
name: A name to give this `Op`.
Returns:
A `Tensor` with same `dtype` as `p`, and shape equal to `p.batch_shape`.
Raises:
ValueError: If `form` not handled by this function.
ValueError: If `form` is `ELBOForms.analytic_entropy` and `n` was provided.
"""
form = ELBOForms.default if form is None else form
if n is not None and form == ELBOForms.analytic_entropy:
raise ValueError('If form == ELBOForms.analytic_entropy, n must be None.')
with ops.name_scope(name, values=[n, z]):
# Entropy: -E_p[log(p(Z))].
entropy = None
# Try analytic path
if form in [ELBOForms.default, ELBOForms.analytic_entropy]:
try:
entropy = p.entropy()
logging.info('Using analytic entropy(p:%s)', p)
except NotImplementedError as e:
if form == ELBOForms.analytic_entropy:
raise e
elif form != ELBOForms.sample:
raise ValueError('ELBOForm not handled by this function: %s' % form)
# Sample path
if entropy is None:
logging.info('Using sampled entropy(p:%s)', p)
if z is None:
z = p.sample(n, seed=seed)
entropy = -monte_carlo.expectation(p.log_prob, z)
return entropy
def renyi_ratio(log_p, q, alpha, z=None, n=None, seed=None, name='renyi_ratio'):
r"""Monte Carlo estimate of the ratio appearing in Renyi divergence.
This can be used to compute the Renyi (alpha) divergence, or a log evidence
approximation based on Renyi divergence.
#### Definition
With `z_i` iid samples from `q`, and `exp{log_p(z)} = p(z)`, this `Op` returns
the (biased for finite `n`) estimate:
```
(1 - alpha)^{-1} Log[ n^{-1} sum_{i=1}^n ( p(z_i) / q(z_i) )^{1 - alpha},
\approx (1 - alpha)^{-1} Log[ E_q[ (p(Z) / q(Z))^{1 - alpha} ] ]
```
This ratio appears in different contexts:
#### Renyi divergence
If `log_p(z) = Log[p(z)]` is the log prob of a distribution, and
`alpha > 0`, `alpha != 1`, this `Op` approximates `-1` times Renyi divergence:
```
# Choose reasonably high n to limit bias, see below.
renyi_ratio(log_p, q, alpha, n=100)
\approx -1 * D_alpha[q || p], where
D_alpha[q || p] := (1 - alpha)^{-1} Log E_q[(p(Z) / q(Z))^{1 - alpha}]
```
The Renyi (or "alpha") divergence is non-negative and equal to zero iff
`q = p`. Various limits of `alpha` lead to different special case results:
```
alpha D_alpha[q || p]
----- ---------------
--> 0 Log[ int_{q > 0} p(z) dz ]
= 0.5, -2 Log[1 - Hel^2[q || p]], (\propto squared Hellinger distance)
--> 1 KL[q || p]
= 2 Log[ 1 + chi^2[q || p] ], (\propto squared Chi-2 divergence)
--> infty Log[ max_z{q(z) / p(z)} ], (min description length principle).
```
See "Renyi Divergence Variational Inference", by Li and Turner.
#### Log evidence approximation
If `log_p(z) = Log[p(z, x)]` is the log of the joint distribution `p`, this is
an alternative to the ELBO common in variational inference.
```
L_alpha(q, p) = Log[p(x)] - D_alpha[q || p]
```
If `q` and `p` have the same support, and `0 < a <= b < 1`, one can show
`ELBO <= D_b <= D_a <= Log[p(x)]`. Thus, this `Op` allows a smooth
interpolation between the ELBO and the true evidence.
#### Stability notes
Note that when `1 - alpha` is not small, the ratio `(p(z) / q(z))^{1 - alpha}`
is subject to underflow/overflow issues. For that reason, it is evaluated in
log-space after centering. Nonetheless, infinite/NaN results may occur. For
that reason, one may wish to shrink `alpha` gradually. See the `Op`
`renyi_alpha`. Using `float64` will also help.
#### Bias for finite sample size
Due to nonlinearity of the logarithm, for random variables `{X_1,...,X_n}`,
`E[ Log[sum_{i=1}^n X_i] ] != Log[ E[sum_{i=1}^n X_i] ]`. As a result, this
estimate is biased for finite `n`. For `alpha < 1`, it is non-decreasing
with `n` (in expectation). For example, if `n = 1`, this estimator yields the
same result as `elbo_ratio`, and as `n` increases the expected value
of the estimator increases.
#### Call signature
User supplies either `Tensor` of samples `z`, or number of samples to draw `n`
Args:
log_p: Callable mapping samples from `q` to `Tensors` with
shape broadcastable to `q.batch_shape`.
For example, `log_p` works "just like" `q.log_prob`.
q: `tf.contrib.distributions.Distribution`.
`float64` `dtype` recommended.
`log_p` and `q` should be supported on the same set.
alpha: `Tensor` with shape `q.batch_shape` and values not equal to 1.
z: `Tensor` of samples from `q`, produced by `q.sample` for some `n`.
n: Integer `Tensor`. The number of samples to use if `z` is not provided.
Note that this can be highly biased for small `n`, see docstring.
seed: Python integer to seed the random number generator.
name: A name to give this `Op`.
Returns:
renyi_result: The scaled log of sample mean. `Tensor` with `shape` equal
to batch shape of `q`, and `dtype` = `q.dtype`.
"""
with ops.name_scope(name, values=[alpha, n, z]):
z = _get_samples(q, z, n, seed)
# Evaluate sample mean in logspace. Note that _logspace_mean will compute
# (among other things) the mean of q.log_prob(z), which could also be
# obtained with q.entropy(). However, DON'T use analytic entropy, because
# that increases variance, and could result in NaN/Inf values of a sensitive
# term.
# log_values
# = (1 - alpha) * ( Log p - Log q )
log_values = (1. - alpha) * (log_p(z) - q.log_prob(z))
# log_mean_values
# = Log[ E[ values ] ]
# = Log[ E[ (p / q)^{1-alpha} ] ]
log_mean_values = _logspace_mean(log_values)
return log_mean_values / (1. - alpha)
def renyi_alpha(step,
decay_time,
alpha_min,
alpha_max=0.99999,
name='renyi_alpha'):
r"""Exponentially decaying `Tensor` appropriate for Renyi ratios.
When minimizing the Renyi divergence for `0 <= alpha < 1` (or maximizing the
Renyi equivalent of elbo) in high dimensions, it is not uncommon to experience
`NaN` and `inf` values when `alpha` is far from `1`.
For that reason, it is often desirable to start the optimization with `alpha`
very close to 1, and reduce it to a final `alpha_min` according to some
schedule. The user may even want to optimize using `elbo_ratio` for
some fixed time before switching to Renyi based methods.
This `Op` returns an `alpha` decaying exponentially with step:
```
s(step) = (exp{step / decay_time} - 1) / (e - 1)
t(s) = max(0, min(s, 1)), (smooth growth from 0 to 1)
alpha(t) = (1 - t) alpha_min + t alpha_max
```
Args:
step: Non-negative scalar `Tensor`. Typically the global step or an
offset version thereof.
decay_time: Positive scalar `Tensor`.
alpha_min: `float` or `double` `Tensor`.
The minimal, final value of `alpha`, achieved when `step >= decay_time`
alpha_max: `Tensor` of same `dtype` as `alpha_min`.
The maximal, beginning value of `alpha`, achieved when `step == 0`
name: A name to give this `Op`.
Returns:
alpha: A `Tensor` of same `dtype` as `alpha_min`.
"""
with ops.name_scope(name, values=[step, decay_time, alpha_min, alpha_max]):
alpha_min = ops.convert_to_tensor(alpha_min, name='alpha_min')
dtype = alpha_min.dtype
alpha_max = ops.convert_to_tensor(alpha_max, dtype=dtype, name='alpha_max')
decay_time = math_ops.cast(decay_time, dtype)
step = math_ops.cast(step, dtype)
check_scalars = [
check_ops.assert_rank(step, 0, message='step must be scalar'),
check_ops.assert_rank(
decay_time, 0, message='decay_time must be scalar'),
check_ops.assert_rank(alpha_min, 0, message='alpha_min must be scalar'),
check_ops.assert_rank(alpha_max, 0, message='alpha_max must be scalar'),
]
check_sign = [
check_ops.assert_non_negative(
step, message='step must be non-negative'),
check_ops.assert_positive(
decay_time, message='decay_time must be positive'),
]
with ops.control_dependencies(check_scalars + check_sign):
theta = (math_ops.exp(step / decay_time) - 1.) / (math.e - 1.)
theta = math_ops.minimum(math_ops.maximum(theta, 0.), 1.)
return alpha_max * (1. - theta) + alpha_min * theta

View File

@ -1,317 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Stochastic gradient estimators.
These functions are meant to be used in conjunction with `StochasticTensor`
(`loss_fn` parameter) and `surrogate_loss`.
See Gradient Estimation Using Stochastic Computation Graphs
(http://arxiv.org/abs/1506.05254) by Schulman et al., eq. 1 and section 4, for
mathematical details.
## Score function estimator
The score function is an unbiased estimator of the gradient of `E_p(x)[f(x)]`,
where `f(x)` can be considered to be a "loss" term. It is computed as
`E_p(x)[f(x) grad(log p(x))]`. A constant `b`, referred to here as the
"baseline", can be subtracted from `f(x)` without affecting the expectation. The
term `(f(x) - b)` is referred to here as the "advantage".
Note that the methods defined in this module actually compute the integrand of
the score function, such that when taking the gradient, the true score function
is computed.
@@score_function
@@get_score_function_with_baseline
@@get_score_function_with_constant_baseline
@@get_score_function_with_advantage
## Baseline functions
Baselines reduce the variance of Monte Carlo estimate of an expectation. The
baseline for a stochastic node can be a function of all non-influenced nodes
(see section 4 of Schulman et al., linked above). Baselines are also known as
"control variates."
In the context of a MC estimate of `E_p(x)[f(x) - b]`, baseline functions have
the signature `(st, fx) => Tensor`, where `st` is a `StochasticTensor` backed by
the distribution `p(x)` and `fx` is the influenced loss.
@@get_mean_baseline
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.training import training
from tensorflow.python.util.all_util import make_all
def score_function(stochastic_tensor, value, loss, baseline=None,
name="ScoreFunction"):
"""Score function estimator.
Computes the integrand of the score function with a baseline:
`p.log_prob(value) * (loss - baseline)`.
It will add a `stop_gradient` to the advantage `(loss - baseline)`.
Args:
stochastic_tensor: `StochasticTensor` p(x).
value: `Tensor` x. Samples from p(x).
loss: `Tensor`.
baseline: `Tensor` broadcastable to `loss`.
name: name to prepend ops with.
Returns:
`Tensor` `p.log_prob(x) * (loss - b)`. Taking the gradient yields the score
function estimator.
"""
with ops.name_scope(name, values=[value, loss, baseline]):
value = ops.convert_to_tensor(value)
loss = ops.convert_to_tensor(loss)
if baseline is not None:
baseline = ops.convert_to_tensor(baseline)
advantage = loss - baseline
else:
advantage = loss
advantage = array_ops.stop_gradient(advantage)
return stochastic_tensor.distribution.log_prob(value) * advantage
def get_score_function_with_advantage(advantage_fn=None,
name="ScoreFunctionWithAdvantage"):
"""Score function estimator with advantage function.
Args:
advantage_fn: callable that takes the `StochasticTensor` and the
downstream `loss` and returns a `Tensor` advantage
(e.g. `loss - baseline`).
name: name to prepend ops with.
Returns:
Callable score function estimator that takes the `StochasticTensor`, the
sampled `value`, and the downstream `loss`, and uses the provided advantage.
"""
def score_function_with_advantage(stochastic_tensor, value, loss):
with ops.name_scope(name, values=[value, loss]):
advantage = advantage_fn(stochastic_tensor, loss)
advantage = array_ops.stop_gradient(advantage)
return stochastic_tensor.distribution.log_prob(value) * advantage
return score_function_with_advantage
def get_score_function_with_constant_baseline(baseline, name="ScoreFunction"):
"""Score function estimator with constant baseline.
Args:
baseline: `Tensor` to be subtracted from loss.
name: name to prepend ops with.
Returns:
Callable score function estimator that takes the `StochasticTensor`, the
sampled `value`, and the downstream `loss`, and subtracts the provided
`baseline` from the `loss`.
"""
def score_function_with_constant_baseline(stochastic_tensor, value, loss):
return score_function(stochastic_tensor, value, loss, baseline, name)
return score_function_with_constant_baseline
def get_score_function_with_baseline(baseline_fn=None, name="ScoreFunction"):
"""Score function estimator with baseline function.
Args:
baseline_fn: callable that takes the `StochasticTensor` and the downstream
`loss` and returns a `Tensor` baseline to be subtracted from the `loss`.
If None, defaults to `get_mean_baseline`, which is an EMA of the loss.
name: name to prepend ops with.
Returns:
Callable score function estimator that takes the `StochasticTensor`, the
sampled `value`, and the downstream `loss`, and subtracts the provided
`baseline` from the `loss`.
"""
if baseline_fn is None:
baseline_fn = get_mean_baseline()
def score_function_with_baseline(stochastic_tensor, value, loss):
with ops.name_scope(name):
b = baseline_fn(stochastic_tensor, loss)
return score_function(stochastic_tensor, value, loss, b)
return score_function_with_baseline
def get_mean_baseline(ema_decay=0.99, name=None):
"""ExponentialMovingAverage baseline.
Args:
ema_decay: decay rate for the ExponentialMovingAverage.
name: name for variable scope of the ExponentialMovingAverage.
Returns:
Callable baseline function that takes the `StochasticTensor` (unused) and
the downstream `loss`, and returns an EMA of the loss.
"""
def mean_baseline(_, loss):
with vs.variable_scope(name, default_name="MeanBaseline"):
reduced_loss = math_ops.reduce_mean(loss)
ema = training.ExponentialMovingAverage(decay=ema_decay, zero_debias=True)
update_op = ema.apply([reduced_loss])
with ops.control_dependencies([update_op]):
# Using `identity` causes an op to be added in this context, which
# triggers the update. Removing the `identity` means nothing is updated.
baseline = array_ops.identity(ema.average(reduced_loss))
return baseline
return mean_baseline
def get_vimco_advantage_fn(have_log_loss=False):
"""VIMCO (Variational Inference for Monte Carlo Objectives) baseline.
Implements VIMCO baseline from the article of the same name:
https://arxiv.org/pdf/1602.06725v2.pdf
Given a `loss` tensor (containing non-negative probabilities or ratios),
calculates the advantage VIMCO advantage via Eq. 9 of the above paper.
The tensor `loss` should be shaped `[n, ...]`, with rank at least 1. Here,
the first axis is considered the single sampling dimension and `n` must
be at least 2. Specifically, the `StochasticTensor` is assumed to have
used the `SampleValue(n)` value type with `n > 1`.
Args:
have_log_loss: Python `Boolean`. If `True`, the loss is assumed to be the
log loss. If `False` (the default), it is assumed to be a nonnegative
probability or probability ratio.
Returns:
Callable baseline function that takes the `StochasticTensor` (unused) and
the downstream `loss`, and returns the VIMCO baseline for the loss.
"""
def vimco_advantage_fn(_, loss, name=None):
"""Internal VIMCO function.
Args:
_: ignored `StochasticTensor`.
loss: The loss `Tensor`.
name: Python string, the name scope to use.
Returns:
The advantage `Tensor`.
"""
with ops.name_scope(name, "VIMCOAdvantage", values=[loss]):
loss = ops.convert_to_tensor(loss)
loss_shape = loss.get_shape()
loss_num_elements = loss_shape[0].value
n = math_ops.cast(
loss_num_elements or array_ops.shape(loss)[0], dtype=loss.dtype)
if have_log_loss:
log_loss = loss
else:
log_loss = math_ops.log(loss)
# Calculate L_hat, Eq. (4) -- stably
log_mean = math_ops.reduce_logsumexp(log_loss, [0]) - math_ops.log(n)
# expand_dims: Expand shape [a, b, c] to [a, 1, b, c]
log_loss_expanded = array_ops.expand_dims(log_loss, [1])
# divide: log_loss_sub with shape [a, a, b, c], where
#
# log_loss_sub[i] = log_loss - log_loss[i]
#
# = [ log_loss[j] - log_loss[i] for rows j = 0 ... i - 1 ]
# [ zeros ]
# [ log_loss[j] - log_loss[i] for rows j = i + 1 ... a - 1 ]
#
log_loss_sub = log_loss - log_loss_expanded
# reduce_sum: Sums each row across all the sub[i]'s; result is:
# reduce_sum[j] = (n - 1) * log_loss[j] - (sum_{i != j} loss[i])
# divide by (n - 1) to get:
# geometric_reduction[j] =
# log_loss[j] - (sum_{i != j} log_loss[i]) / (n - 1)
geometric_reduction = math_ops.reduce_sum(log_loss_sub, [0]) / (n - 1)
# subtract this from the original log_loss to get the baseline:
# geometric_mean[j] = exp((sum_{i != j} log_loss[i]) / (n - 1))
log_geometric_mean = log_loss - geometric_reduction
## Equation (9)
# Calculate sum_{i != j} loss[i] -- via exp(reduce_logsumexp(.))
# reduce_logsumexp: log-sum-exp each row across all the
# -sub[i]'s, result is:
#
# exp(reduce_logsumexp[j]) =
# 1 + sum_{i != j} exp(log_loss[i] - log_loss[j])
log_local_learning_reduction = math_ops.reduce_logsumexp(
-log_loss_sub, [0])
# convert local_learning_reduction to the sum-exp of the log-sum-exp
# (local_learning_reduction[j] - 1) * exp(log_loss[j])
# = sum_{i != j} exp(log_loss[i])
local_learning_log_sum = (
_logexpm1(log_local_learning_reduction) + log_loss)
# Add (logaddexp) the local learning signals (Eq. 9)
local_learning_signal = (
math_ops.reduce_logsumexp(
array_ops.stack((local_learning_log_sum, log_geometric_mean)),
[0])
- math_ops.log(n))
advantage = log_mean - local_learning_signal
return advantage
return vimco_advantage_fn
def _logexpm1(x):
"""Stably calculate log(exp(x)-1)."""
with ops.name_scope("logsumexp1"):
eps = np.finfo(x.dtype.as_numpy_dtype).eps
# Choose a small offset that makes gradient calculations stable for
# float16, float32, and float64.
safe_log = lambda y: math_ops.log(y + eps / 1e8) # For gradient stability
return array_ops.where(
math_ops.abs(x) < eps,
safe_log(x) + x/2 + x*x/24, # small x approximation to log(expm1(x))
safe_log(math_ops.exp(x) - 1))
__all__ = make_all(__name__)

View File

@ -1,37 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Support for Stochastic Computation Graphs.
See the @{$python/contrib.bayesflow.stochastic_graph} guide.
@@surrogate_loss
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.contrib.bayesflow.python.ops.stochastic_graph_impl import *
# pylint: enable=wildcard-import
from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
"surrogate_loss"
]
remove_undocumented(__name__, _allowed_symbols)

View File

@ -1,175 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classes and helper functions for Stochastic Computation Graphs.
## Stochastic Computation Graph Helper Functions
@@surrogate_loss
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor_impl
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.platform import tf_logging as logging
def _upstream_stochastic_nodes(tensors):
"""Map tensors to the stochastic tensors upstream of them.
Args:
tensors: a list of Tensors.
Returns:
A dict that maps the tensors passed in to the `StochasticTensor` objects
upstream of them.
"""
reverse_map = _stochastic_dependencies_map(tensors)
upstream = collections.defaultdict(set)
for st, ts in reverse_map.items():
for t in ts:
upstream[t].add(st)
return upstream
def _stochastic_dependencies_map(fixed_losses, stochastic_tensors=None):
"""Map stochastic tensors to the fixed losses that depend on them.
Args:
fixed_losses: a list of `Tensor`s.
stochastic_tensors: a list of `StochasticTensor`s to map to fixed losses.
If `None`, all `StochasticTensor`s in the graph will be used.
Returns:
A dict `dependencies` that maps `StochasticTensor` objects to subsets of
`fixed_losses`.
If `loss in dependencies[st]`, for some `loss` in `fixed_losses` then there
is a direct path from `st.value()` to `loss` in the graph.
"""
stoch_value_collection = stochastic_tensors or ops.get_collection(
stochastic_tensor_impl.STOCHASTIC_TENSOR_COLLECTION)
if not stoch_value_collection:
return {}
stoch_value_map = dict(
(node.value(), node) for node in stoch_value_collection)
# Step backwards through the graph to see which surrogate losses correspond
# to which fixed_losses.
#
# TODO(ebrevdo): Ensure that fixed_losses and stochastic values are in the
# same frame.
stoch_dependencies_map = collections.defaultdict(set)
for loss in fixed_losses:
boundary = set([loss])
while boundary:
edge = boundary.pop()
edge_stoch_node = stoch_value_map.get(edge, None)
if edge_stoch_node:
stoch_dependencies_map[edge_stoch_node].add(loss)
boundary.update(edge.op.inputs)
return stoch_dependencies_map
def surrogate_loss(sample_losses,
stochastic_tensors=None,
name="SurrogateLoss"):
"""Surrogate loss for stochastic graphs.
This function will call `loss_fn` on each `StochasticTensor`
upstream of `sample_losses`, passing the losses that it influenced.
Note that currently `surrogate_loss` does not work with `StochasticTensor`s
instantiated in `while_loop`s or other control structures.
Args:
sample_losses: a list or tuple of final losses. Each loss should be per
example in the batch (and possibly per sample); that is, it should have
dimensionality of 1 or greater. All losses should have the same shape.
stochastic_tensors: a list of `StochasticTensor`s to add loss terms for.
If None, defaults to all `StochasticTensor`s in the graph upstream of
the `Tensor`s in `sample_losses`.
name: the name with which to prepend created ops.
Returns:
`Tensor` loss, which is the sum of `sample_losses` and the
`loss_fn`s returned by the `StochasticTensor`s.
Raises:
TypeError: if `sample_losses` is not a list or tuple, or if its elements
are not `Tensor`s.
ValueError: if any loss in `sample_losses` does not have dimensionality 1
or greater.
"""
with ops.name_scope(name, values=sample_losses):
if not isinstance(sample_losses, (list, tuple)):
raise TypeError("sample_losses must be a list or tuple")
for loss in sample_losses:
if not isinstance(loss, ops.Tensor):
raise TypeError("loss is not a Tensor: %s" % loss)
ndims = loss.get_shape().ndims
if not (ndims is not None and ndims >= 1):
raise ValueError("loss must have dimensionality 1 or greater: %s" %
loss)
stoch_dependencies_map = _stochastic_dependencies_map(
sample_losses, stochastic_tensors=stochastic_tensors)
if not stoch_dependencies_map:
logging.warn(
"No collection of Stochastic Tensors found for current graph.")
return math_ops.add_n(sample_losses)
# Iterate through all of the stochastic dependencies, adding
# surrogate terms where necessary.
sample_losses = [ops.convert_to_tensor(loss) for loss in sample_losses]
loss_terms = sample_losses
for (stoch_node, dependent_losses) in stoch_dependencies_map.items():
dependent_losses = list(dependent_losses)
logging.info("Losses influenced by StochasticTensor %s: [%s]",
stoch_node.name, ", ".join(
[loss.name for loss in dependent_losses]))
# Sum up the downstream losses for this ST
influenced_loss = _add_n_or_sum(dependent_losses)
# Compute surrogate loss term
loss_term = stoch_node.loss(array_ops.stop_gradient(influenced_loss))
if loss_term is not None:
loss_terms.append(loss_term)
return _add_n_or_sum(loss_terms)
def _add_n_or_sum(terms):
# add_n works for Tensors of the same dtype and shape
shape = terms[0].get_shape()
dtype = terms[0].dtype
if all(term.get_shape().is_fully_defined() and
term.get_shape().is_compatible_with(shape) and term.dtype == dtype
for term in terms):
return math_ops.add_n(terms)
else:
return sum(terms)

View File

@ -1,48 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Support for creating Stochastic Tensors.
See the @{$python/contrib.bayesflow.stochastic_tensor} guide.
@@BaseStochasticTensor
@@StochasticTensor
@@MeanValue
@@SampleValue
@@value_type
@@get_current_value_type
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.contrib.bayesflow.python.ops.stochastic_tensor_impl import *
# pylint: enable=wildcard-import
from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
"BaseStochasticTensor",
"StochasticTensor",
"ObservedStochasticTensor",
"MeanValue",
"SampleValue",
"value_type",
"get_current_value_type",
]
remove_undocumented(__name__, _allowed_symbols)

View File

@ -1,477 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classes and helper functions for creating Stochastic Tensors.
`StochasticTensor` objects wrap `Distribution` objects. Their
values may be samples from the underlying distribution, or the distribution
mean (as governed by `value_type`). These objects provide a `loss`
method for use when sampling from a non-reparameterized distribution.
The `loss`method is used in conjunction with `stochastic_graph.surrogate_loss`
to produce a single differentiable loss in stochastic graphs having
both continuous and discrete stochastic nodes.
## Stochastic Tensor Classes
@@BaseStochasticTensor
@@StochasticTensor
## Stochastic Tensor Value Types
@@MeanValue
@@SampleValue
@@value_type
@@get_current_value_type
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import abc
import collections
import contextlib
import threading
import six
from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators as sge
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.distributions import distribution
STOCHASTIC_TENSOR_COLLECTION = "_stochastic_tensor_collection_"
@six.add_metaclass(abc.ABCMeta)
class BaseStochasticTensor(object):
"""Base Class for Tensor-like objects that emit stochastic values."""
def __init__(self):
# Add self to this graph's Stochsatic Tensor collection for
# purposes of later performing correct surrogate loss calculation.
ops.add_to_collection(STOCHASTIC_TENSOR_COLLECTION, self)
@abc.abstractproperty
def name(self):
pass
@abc.abstractproperty
def dtype(self):
pass
@abc.abstractproperty
def graph(self):
pass
@abc.abstractmethod
def value(self, name=None):
pass
@abc.abstractmethod
def loss(self, sample_loss):
"""Returns the term to add to the surrogate loss.
This method is called by `surrogate_loss`. The input `sample_loss` should
have already had `stop_gradient` applied to it. This is because the
surrogate_loss usually provides a Monte Carlo sample term of the form
`differentiable_surrogate * sample_loss` where `sample_loss` is considered
constant with respect to the input for purposes of the gradient.
Args:
sample_loss: `Tensor`, sample loss downstream of this `StochasticTensor`.
Returns:
Either `None` or a `Tensor`.
"""
raise NotImplementedError("surrogate_loss not implemented")
@staticmethod
def _tensor_conversion_function(v, dtype=None, name=None, as_ref=False):
_ = name
if dtype and not dtype.is_compatible_with(v.dtype):
raise ValueError(
"Incompatible type conversion requested to type '%s' for variable "
"of type '%s'" % (dtype.name, v.dtype.name))
if as_ref:
raise ValueError("%s: Ref type is not supported." % v)
return v.value()
# pylint: disable=protected-access
ops.register_tensor_conversion_function(
BaseStochasticTensor, BaseStochasticTensor._tensor_conversion_function)
# pylint: enable=protected-access
class _StochasticValueType(object):
"""Interface for the ValueType classes.
This is the base class for MeanValue, SampleValue, and their descendants.
"""
def pushed_above(self, unused_value_type):
pass
def popped_above(self, unused_value_type):
pass
def declare_inputs(self, unused_stochastic_tensor, unused_inputs_dict):
pass
@abc.abstractproperty
def stop_gradient(self):
"""Whether the value should be wrapped in stop_gradient.
StochasticTensors must respect this property.
"""
pass
class MeanValue(_StochasticValueType):
def __init__(self, stop_gradient=False):
self._stop_gradient = stop_gradient
@property
def stop_gradient(self):
return self._stop_gradient
class SampleValue(_StochasticValueType):
"""Draw samples, possibly adding new outer dimensions along the way.
This ValueType draws samples from StochasticTensors run within its
context, increasing the rank according to the requested shape.
Examples:
```python
mu = tf.zeros((2,3))
sigma = tf.ones((2, 3))
with sg.value_type(sg.SampleValue()):
st = sg.StochasticTensor(
tf.contrib.distributions.Normal, mu=mu, sigma=sigma)
# draws 1 sample and does not reshape
assertEqual(st.value().get_shape(), (2, 3))
```
```python
mu = tf.zeros((2,3))
sigma = tf.ones((2, 3))
with sg.value_type(sg.SampleValue(4)):
st = sg.StochasticTensor(
tf.contrib.distributions.Normal, mu=mu, sigma=sigma)
# draws 4 samples each with shape (2, 3) and concatenates
assertEqual(st.value().get_shape(), (4, 2, 3))
```
"""
def __init__(self, shape=(), stop_gradient=False):
"""Sample according to shape.
For the given StochasticTensor `st` using this value type,
the shape of `st.value()` will match that of
`st.distribution.sample(shape)`.
Args:
shape: A shape tuple or int32 tensor. The sample shape.
Default is a scalar: take one sample and do not change the size.
stop_gradient: If `True`, StochasticTensors' values are wrapped in
`stop_gradient`, to avoid backpropagation through.
"""
self._shape = shape
self._stop_gradient = stop_gradient
@property
def shape(self):
return self._shape
@property
def stop_gradient(self):
return self._stop_gradient
# Keeps track of how a StochasticTensor's value should be accessed.
# Used by value_type and get_current_value_type below.
_STOCHASTIC_VALUE_STACK = collections.defaultdict(list)
@contextlib.contextmanager
def value_type(dist_value_type):
"""Creates a value type context for any StochasticTensor created within.
Typical usage:
```
with sg.value_type(sg.MeanValue(stop_gradients=True)):
st = sg.StochasticTensor(tf.contrib.distributions.Normal, mu=mu,
sigma=sigma)
```
In the example above, `st.value()` (or equivalently, `tf.identity(st)`) will
be the mean value of the Normal distribution, i.e., `mu` (possibly
broadcasted to the shape of `sigma`). Furthermore, because the `MeanValue`
was marked with `stop_gradients=True`, this value will have been wrapped
in a `stop_gradients` call to disable any possible backpropagation.
Args:
dist_value_type: An instance of `MeanValue`, `SampleValue`, or
any other stochastic value type.
Yields:
A context for `StochasticTensor` objects that controls the
value created when they are initialized.
Raises:
TypeError: if `dist_value_type` is not an instance of a stochastic value
type.
"""
if not isinstance(dist_value_type, _StochasticValueType):
raise TypeError("dist_value_type must be a Distribution Value Type")
thread_id = threading.current_thread().ident
stack = _STOCHASTIC_VALUE_STACK[thread_id]
if stack:
stack[-1].pushed_above(dist_value_type)
stack.append(dist_value_type)
yield
stack.pop()
if stack:
stack[-1].popped_above(dist_value_type)
class NoValueTypeSetError(ValueError):
pass
def get_current_value_type():
thread_id = threading.current_thread().ident
if not _STOCHASTIC_VALUE_STACK[thread_id]:
raise NoValueTypeSetError(
"No value type currently set for this thread (%s). Did you forget to "
"wrap 'with stochastic_graph.value_type(...)'?" % thread_id)
return _STOCHASTIC_VALUE_STACK[thread_id][-1]
class StochasticTensor(BaseStochasticTensor):
"""StochasticTensor is a BaseStochasticTensor backed by a distribution."""
def __init__(self,
dist,
name="StochasticTensor",
dist_value_type=None,
loss_fn=sge.score_function):
"""Construct a `StochasticTensor`.
`StochasticTensor` is backed by the `dist` distribution and its `value`
method will return the same value each time it is called. What `value` is
returned is controlled by the `dist_value_type` (defaults to
`SampleValue`).
Some distributions' sample functions are not differentiable (e.g. a sample
from a discrete distribution like a Bernoulli) and so to differentiate
wrt parameters upstream of the sample requires a gradient estimator like
the score function estimator. This is accomplished by passing a
differentiable `loss_fn` to the `StochasticTensor`, which
defaults to a function whose derivative is the score function estimator.
Calling `stochastic_graph.surrogate_loss(final_losses)` will call
`loss()` on every `StochasticTensor` upstream of final losses.
`loss()` will return None for `StochasticTensor`s backed by
reparameterized distributions; it will also return None if the value type is
`MeanValueType` or if `loss_fn=None`.
Args:
dist: an instance of `Distribution`.
name: a name for this `StochasticTensor` and its ops.
dist_value_type: a `_StochasticValueType`, which will determine what the
`value` of this `StochasticTensor` will be. If not provided, the
value type set with the `value_type` context manager will be used.
loss_fn: callable that takes
`(st, st.value(), influenced_loss)`, where
`st` is this `StochasticTensor`, and returns a `Tensor` loss. By
default, `loss_fn` is the `score_function`, or more precisely, the
integral of the score function, such that when the gradient is taken,
the score function results. See the `stochastic_gradient_estimators`
module for additional loss functions and baselines.
Raises:
TypeError: if `dist` is not an instance of `Distribution`.
TypeError: if `loss_fn` is not `callable`.
"""
if not isinstance(dist, distribution.Distribution):
raise TypeError("dist must be an instance of Distribution")
if dist_value_type is None:
try:
self._value_type = get_current_value_type()
except NoValueTypeSetError:
self._value_type = SampleValue()
else:
# We want to enforce a value type here, but use the value_type()
# context manager to enforce some error checking.
with value_type(dist_value_type):
self._value_type = get_current_value_type()
if loss_fn is not None and not callable(loss_fn):
raise TypeError("loss_fn must be callable")
self._loss_fn = loss_fn
with ops.name_scope(name) as scope:
self._name = scope
self._dist = dist
self._value = self._create_value()
super(StochasticTensor, self).__init__()
@property
def value_type(self):
return self._value_type
@property
def distribution(self):
return self._dist
def _create_value(self):
"""Create the value Tensor based on the value type, store as self._value."""
if isinstance(self._value_type, MeanValue):
value_tensor = self._dist.mean()
elif isinstance(self._value_type, SampleValue):
value_tensor = self._dist.sample(self._value_type.shape)
else:
raise TypeError("Unrecognized Distribution Value Type: %s",
self._value_type)
if self._value_type.stop_gradient:
# stop_gradient is being enforced by the value type
return array_ops.stop_gradient(value_tensor)
if isinstance(self._value_type, MeanValue):
return value_tensor # Using pathwise-derivative for this one.
if self._dist.reparameterization_type == distribution.FULLY_REPARAMETERIZED:
return value_tensor # Using pathwise-derivative for this one.
else:
# Will have to perform some variant of score function
# estimation. Call stop_gradient on the sampler just in case we
# may accidentally leak some gradient from it.
return array_ops.stop_gradient(value_tensor)
@property
def name(self):
return self._name
@property
def graph(self):
return self._value.graph
@property
def dtype(self):
return self._dist.dtype
def entropy(self, name="entropy"):
return self._dist.entropy(name=name)
def mean(self, name="mean"):
return self._dist.mean(name=name)
def value(self, name="value"):
return self._value
def loss(self, final_loss, name="Loss"):
# Return a loss based on final_loss and the distribution. Returns
# None if pathwise derivatives are supported, if the loss_fn
# was explicitly set to None, or if the value type is MeanValue.
if self._loss_fn is None:
return None
if (self._dist.reparameterization_type == distribution.FULLY_REPARAMETERIZED
and not self._value_type.stop_gradient):
# Can perform pathwise-derivative on this one; no additional loss needed.
return None
with ops.name_scope(self.name, values=[final_loss]):
with ops.name_scope(name):
if (self._value_type.stop_gradient or
isinstance(self._value_type, SampleValue)):
return self._loss_fn(self, self._value, final_loss)
elif isinstance(self._value_type, MeanValue):
return None # MeanValue generally provides its own gradient
else:
raise TypeError("Unrecognized Distribution Value Type: %s",
self._value_type)
class ObservedStochasticTensor(StochasticTensor):
"""A StochasticTensor with an observed value."""
# pylint: disable=super-init-not-called
def __init__(self, dist, value, name=None):
"""Construct an `ObservedStochasticTensor`.
`ObservedStochasticTensor` is backed by distribution `dist` and uses the
provided value instead of using the current value type to draw a value from
the distribution. The provided value argument must be appropriately shaped
to have come from the distribution.
Args:
dist: an instance of `Distribution`.
value: a Tensor containing the observed value
name: a name for this `ObservedStochasticTensor` and its ops.
Raises:
TypeError: if `dist` is not an instance of `Distribution`.
ValueError: if `value` is not compatible with the distribution.
"""
if not isinstance(dist, distribution.Distribution):
raise TypeError("dist must be an instance of Distribution")
with ops.name_scope(name, "ObservedStochasticTensor", [value]) as scope:
self._name = scope
self._dist = dist
dist_shape = self._dist.batch_shape.concatenate(
self._dist.event_shape)
value = ops.convert_to_tensor(value)
value_shape = value.get_shape()
if not value_shape.is_compatible_with(dist_shape):
if value_shape.ndims < dist_shape.ndims:
raise ValueError(
"Rank of observed value (%d) must be >= rank of a sample from the"
" distribution (%d)." % (value_shape.ndims, dist_shape.ndims))
sample_shape = value_shape[(value_shape.ndims - dist_shape.ndims):]
if not sample_shape.is_compatible_with(dist_shape):
raise ValueError(
"Shape of observed value %s is incompatible with the shape of a "
"sample from the distribution %s." % (value_shape, dist_shape))
if value.dtype != self._dist.dtype:
raise ValueError("Type of observed value (%s) does not match type of "
"distribution (%s)." % (value.dtype, self._dist.dtype))
self._value = array_ops.identity(value)
# pylint: disable=non-parent-init-called
BaseStochasticTensor.__init__(self)
def loss(self, final_loss, name=None):
return None
__all__ = [
"BaseStochasticTensor",
"StochasticTensor",
"ObservedStochasticTensor",
"MeanValue",
"SampleValue",
"value_type",
"get_current_value_type",
]

View File

@ -1,151 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Custom `get_variable` for stochastic variables.
@@get_stochastic_variable
@@make_stochastic_variable_getter
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor as st
from tensorflow.contrib.bayesflow.python.ops import variational_inference as vi
def get_stochastic_variable(getter,
name,
shape=None,
dist_cls=None,
dist_kwargs=None,
param_initializers=None,
prior=None,
**kwargs):
"""Custom variable getter for stochastic variables.
`get_stochastic_variable` will create variables backing the parameters of a
distribution, defined by `dist_cls`, and return a `StochasticTensor` which
represents a sample from the backing distribution.
Meant to be passed as the `custom_getter` to a `variable_scope`. Use
`make_stochastic_variable_getter` to partially apply distribution-related
args.
Usage:
```python
sv = tf.contrib.bayesflow.stochastic_variables
dist = tf.contrib.distributions
with tf.variable_scope('my_scope',
custom_getter=sv.make_stochastic_variable_getter(
dist_cls=dist.NormalWithSoftplusSigma
param_initializers={
"sigma": lambda shape, dtype, pi: (
tf.constant(0.5, dtype=dtype, shape=shape))
})):
v = tf.get_variable('my_var', (10, 20))
```
`v` is a `StochasticTensor`, which is a sample from a backing
`NormalWithSoftplusSigma` distribution. Underneath, 2 variables have been
created: `my_var_mu` and `my_var_sigma`. `my_var_sigma` has been appropriately
constrained to be positive by the `NormalWithSoftplusSigma` constructor, and
initialized to a value of 0.5, which results in a sigma of ~1 after the
softplus. The sample will have shape `(10, 20)`.
Args:
getter: original variable getter.
name: prefix for variable(s) backing distribution parameters.
shape: shape of the sample from the distribution (i.e. shape of the
returned `StochasticTensor`).
dist_cls: subclass of `Distribution` that implements `param_shapes`. Should
accept unconstrained parameters (e.g. `NormalWithSoftplusSigma` accepts
real-valued `sigma` and constrains it to be positive with `softplus`).
dist_kwargs: `dict` of kwargs to be forwarded to `dist_cls`.
param_initializers: `dict` from parameter name to initializer (see
`get_variable` for initializer docs). Will override `initializer` in
`kwargs`. `param_initializers` may contain initializers for only some of
the parameters. Those parameters that do not contain entries will be
initialized by `kwargs['initializer']`, if provided; otherwise, the
default initialization of `getter` will be used.
prior: instance of `Distribution` or a callable
`(TensorShape, dtype) => Distribution`. If provided, will be registered
as the prior for the `StochasticTensor` using
`variational_inference.register_prior`.
**kwargs: kwargs forwarded to `getter`.
Returns:
`StochasticTensor`, which represents a sample from the backing distribution.
"""
param_initializers = param_initializers or {}
param_shapes = {}
if shape is not None:
param_shapes = dist_cls.param_static_shapes(shape)
param_names = set(list(param_shapes.keys()) + list(param_initializers.keys()))
params = {}
for param_name in param_names:
# For each parameter, its param_initializer is used, if provided. Otherwise,
# kwargs['initializer'] is used. If neither were provided, the default
# variable initialization in getter will be used (i.e. getter will be passed
# initializer=None.
original_initializer = kwargs.pop('initializer', None)
param_initializer = param_initializers.get(param_name, None)
if param_initializer is None:
param_initializer = original_initializer
if callable(param_initializer) or param_initializer is None:
param_shape = param_shapes.get(param_name, None)
else:
param_shape = None
params[param_name] = getter(
name + '_' + param_name,
shape=param_shape,
initializer=param_initializer,
**kwargs)
dist_kwargs = dist_kwargs or {}
dist_kwargs.update(params)
sample = st.StochasticTensor(dist_cls(**dist_kwargs))
if prior is not None:
if callable(prior):
sample_value = sample.value()
sample_value.get_shape().assert_is_fully_defined()
prior = prior(sample_value.get_shape(), sample_value.dtype)
vi.register_prior(sample, prior)
return sample
def make_stochastic_variable_getter(dist_cls,
dist_kwargs=None,
param_initializers=None,
prior=None):
"""`get_stochastic_variable` with args partially applied."""
return functools.partial(
get_stochastic_variable,
dist_cls=dist_cls,
dist_kwargs=dist_kwargs,
param_initializers=param_initializers,
prior=prior)

View File

@ -1,34 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Variational inference.
See the ${@python/contrib.bayesflow.variational_inference} guide.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.contrib.bayesflow.python.ops.variational_inference_impl import *
# pylint: enable=wildcard-import
from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
"elbo", "elbo_with_log_joint", "ELBOForms", "register_prior"
]
remove_undocumented(__name__, _allowed_symbols)

View File

@ -1,327 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Variational inference.
See the ${@python/contrib.bayesflow.variational_inference} guide.
@@elbo
@@elbo_with_log_joint
@@ELBOForms
@@register_prior
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.bayesflow.python.ops import stochastic_graph_impl as sg
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor_impl as st
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import kullback_leibler
from tensorflow.python.platform import tf_logging as logging
VI_PRIORS = "__vi_priors__"
def register_prior(variational, prior):
"""Associate a variational `StochasticTensor` with a `Distribution` prior.
This is a helper function used in conjunction with `elbo` that allows users
to specify the mapping between variational distributions and their priors
without having to pass in `variational_with_prior` explicitly.
Args:
variational: `StochasticTensor` q(Z). Approximating distribution.
prior: `Distribution` p(Z). Prior distribution.
Returns:
None
Raises:
ValueError: if variational is not a `StochasticTensor` or `prior` is not
a `Distribution`.
"""
if not isinstance(variational, st.StochasticTensor):
raise TypeError("variational must be a StochasticTensor")
if not isinstance(prior, distribution.Distribution):
raise TypeError("prior must be a Distribution")
ops.add_to_collection(VI_PRIORS, (variational, prior))
class _ELBOForm(object):
pass
class ELBOForms(object):
"""Constants to control the `elbo` calculation.
`analytic_kl` uses the analytic KL divergence between the
variational distribution(s) and the prior(s).
`analytic_entropy` uses the analytic entropy of the variational
distribution(s).
`sample` uses the sample KL or the sample entropy is the joint is provided.
See `elbo` for what is used with `default`.
"""
default, analytic_kl, analytic_entropy, sample = (_ELBOForm()
for _ in range(4))
@staticmethod
def check_form(form):
if form not in {
ELBOForms.default, ELBOForms.analytic_kl, ELBOForms.analytic_entropy,
ELBOForms.sample
}:
raise TypeError("form must be an ELBOForms constant")
def elbo(log_likelihood,
variational_with_prior=None,
keep_batch_dim=True,
form=None,
name="ELBO"):
r"""Evidence Lower BOund. `log p(x) >= ELBO`.
Optimization objective for inference of hidden variables by variational
inference.
This function is meant to be used in conjunction with `StochasticTensor`.
The user should build out the inference network, using `StochasticTensor`s
as latent variables, and the generative network. `elbo` at minimum needs
`p(x|Z)` and assumes that all `StochasticTensor`s upstream of `p(x|Z)` are
the variational distributions. Use `register_prior` to register `Distribution`
priors for each `StochasticTensor`. Alternatively, pass in
`variational_with_prior` specifying all variational distributions and their
priors.
Mathematical details:
```
log p(x) = log \int p(x, Z) dZ
= log \int \frac {q(Z)p(x, Z)}{q(Z)} dZ
= log E_q[\frac {p(x, Z)}{q(Z)}]
>= E_q[log \frac {p(x, Z)}{q(Z)}] = L[q; p, x] # ELBO
L[q; p, x] = E_q[log p(x|Z)p(Z)] - E_q[log q(Z)]
= E_q[log p(x|Z)p(Z)] + H[q] (1)
= E_q[log p(x|Z)] - KL(q || p) (2)
H - Entropy
KL - Kullback-Leibler divergence
```
See section 2.2 of Stochastic Variational Inference by Hoffman et al. for
more, including the ELBO's equivalence to minimizing `KL(q(Z)||p(Z|x))`
in the fully Bayesian setting. https://arxiv.org/pdf/1206.7051.pdf.
`form` specifies which form of the ELBO is used. `form=ELBOForms.default`
tries, in order of preference: analytic KL, analytic entropy, sampling.
Multiple entries in the `variational_with_prior` dict implies a factorization.
e.g. `q(Z) = q(z1)q(z2)q(z3)`.
Args:
log_likelihood: `Tensor` log p(x|Z).
variational_with_prior: dict from `StochasticTensor` q(Z) to
`Distribution` p(Z). If `None`, defaults to all `StochasticTensor`
objects upstream of `log_likelihood` with priors registered with
`register_prior`.
keep_batch_dim: bool. Whether to keep the batch dimension when summing
entropy/KL term. When the sample is per data point, this should be True;
otherwise (e.g. in a Bayesian NN), this should be False.
form: ELBOForms constant. Controls how the ELBO is computed. Defaults to
ELBOForms.default.
name: name to prefix ops with.
Returns:
`Tensor` ELBO of the same type and shape as `log_likelihood`.
Raises:
TypeError: if variationals in `variational_with_prior` are not
`StochasticTensor`s or if priors are not `Distribution`s.
TypeError: if form is not a valid ELBOForms constant.
ValueError: if `variational_with_prior` is None and there are no
`StochasticTensor`s upstream of `log_likelihood`.
ValueError: if any variational does not have a prior passed or registered.
"""
if form is None:
form = ELBOForms.default
with ops.name_scope(name):
model = ops.convert_to_tensor(log_likelihood)
variational_with_prior = _find_variational_and_priors(
model, variational_with_prior)
return _elbo(form, log_likelihood, None, variational_with_prior,
keep_batch_dim)
def elbo_with_log_joint(log_joint,
variational=None,
keep_batch_dim=True,
form=None,
name="ELBO"):
"""Evidence Lower BOund. `log p(x) >= ELBO`.
This method is for models that have computed `p(x,Z)` instead of `p(x|Z)`.
See `elbo` for further details.
Because only the joint is specified, analytic KL is not available.
Args:
log_joint: `Tensor` log p(x, Z).
variational: list of `StochasticTensor` q(Z). If `None`, defaults to all
`StochasticTensor` objects upstream of `log_joint`.
keep_batch_dim: bool. Whether to keep the batch dimension when summing
entropy term. When the sample is per data point, this should be True;
otherwise (e.g. in a Bayesian NN), this should be False.
form: ELBOForms constant. Controls how the ELBO is computed. Defaults to
ELBOForms.default.
name: name to prefix ops with.
Returns:
`Tensor` ELBO of the same type and shape as `log_joint`.
Raises:
TypeError: if variationals in `variational` are not `StochasticTensor`s.
TypeError: if form is not a valid ELBOForms constant.
ValueError: if `variational` is None and there are no `StochasticTensor`s
upstream of `log_joint`.
ValueError: if form is ELBOForms.analytic_kl.
"""
if form is None:
form = ELBOForms.default
if form == ELBOForms.analytic_kl:
raise ValueError("ELBOForms.analytic_kl is not available when using "
"elbo_with_log_joint. Use elbo or a different form.")
with ops.name_scope(name):
model = ops.convert_to_tensor(log_joint)
variational_with_prior = None
if variational is not None:
variational_with_prior = dict(zip(variational, [None] * len(variational)))
variational_with_prior = _find_variational_and_priors(
model, variational_with_prior, require_prior=False)
return _elbo(form, None, log_joint, variational_with_prior, keep_batch_dim)
def _elbo(form, log_likelihood, log_joint, variational_with_prior,
keep_batch_dim):
"""Internal implementation of ELBO. Users should use `elbo`.
Args:
form: ELBOForms constant. Controls how the ELBO is computed.
log_likelihood: `Tensor` log p(x|Z).
log_joint: `Tensor` log p(x, Z).
variational_with_prior: `dict<StochasticTensor, Distribution>`, varational
distributions to prior distributions.
keep_batch_dim: bool. Whether to keep the batch dimension when reducing
the entropy/KL.
Returns:
ELBO `Tensor` with same shape and dtype as `log_likelihood`/`log_joint`.
"""
ELBOForms.check_form(form)
# Order of preference
# 1. Analytic KL: log_likelihood - KL(q||p)
# 2. Analytic entropy: log_likelihood + log p(Z) + H[q], or log_joint + H[q]
# 3. Sample: log_likelihood - (log q(Z) - log p(Z)) =
# log_likelihood + log p(Z) - log q(Z), or log_joint - q(Z)
def _reduce(val):
if keep_batch_dim:
return val
else:
return math_ops.reduce_sum(val)
kl_terms = []
entropy_terms = []
prior_terms = []
for q, z, p in [(qz.distribution, qz.value(), pz)
for qz, pz in variational_with_prior.items()]:
# Analytic KL
kl = None
if log_joint is None and form in {ELBOForms.default, ELBOForms.analytic_kl}:
try:
kl = kullback_leibler.kl_divergence(q, p)
logging.info("Using analytic KL between q:%s, p:%s", q, p)
except NotImplementedError as e:
if form == ELBOForms.analytic_kl:
raise e
if kl is not None:
kl_terms.append(-1. * _reduce(kl))
continue
# Analytic entropy
entropy = None
if form in {ELBOForms.default, ELBOForms.analytic_entropy}:
try:
entropy = q.entropy()
logging.info("Using analytic entropy for q:%s", q)
except NotImplementedError as e:
if form == ELBOForms.analytic_entropy:
raise e
if entropy is not None:
entropy_terms.append(_reduce(entropy))
if log_likelihood is not None:
prior = p.log_prob(z)
prior_terms.append(_reduce(prior))
continue
# Sample
if form in {ELBOForms.default, ELBOForms.sample}:
entropy = -q.log_prob(z)
entropy_terms.append(_reduce(entropy))
if log_likelihood is not None:
prior = p.log_prob(z)
prior_terms.append(_reduce(prior))
first_term = log_joint if log_joint is not None else log_likelihood
return sum([first_term] + kl_terms + entropy_terms + prior_terms)
def _find_variational_and_priors(model,
variational_with_prior,
require_prior=True):
"""Find upstream StochasticTensors and match with registered priors."""
if variational_with_prior is None:
# pylint: disable=protected-access
upstreams = sg._upstream_stochastic_nodes([model])
# pylint: enable=protected-access
upstreams = list(upstreams[model])
if not upstreams:
raise ValueError("No upstream stochastic nodes found for tensor: %s",
model)
prior_map = dict(ops.get_collection(VI_PRIORS))
variational_with_prior = {}
for q in upstreams:
if require_prior and (q not in prior_map or prior_map[q] is None):
raise ValueError("No prior specified for StochasticTensor: %s", q)
variational_with_prior[q] = prior_map.get(q)
if not all(
[isinstance(q, st.StochasticTensor) for q in variational_with_prior]):
raise TypeError("variationals must be StochasticTensors")
if not all([
p is None or isinstance(p, distribution.Distribution)
for p in variational_with_prior.values()
]):
raise TypeError("priors must be Distribution objects")
return variational_with_prior

View File

@ -2,12 +2,15 @@
# Contains ops for statistical distributions (with pdf, cdf, sample, etc...).
# APIs here are meant to evolve over time.
package(default_visibility = [
"//learning/brain/contrib/bayesflow:__subpackages__",
"//tensorflow:__subpackages__",
])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
package(default_visibility = ["//tensorflow:__subpackages__"])
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
py_library(

View File

@ -1,47 +1 @@
# BayesFlow Entropy (contrib)
[TOC]
Entropy Ops.
## Background
Common Shannon entropy, the Evidence Lower BOund (ELBO), KL divergence, and more
all have information theoretic use and interpretations. They are also often
used in variational inference. This library brings together `Ops` for
estimating them, e.g. using Monte Carlo expectations.
## Examples
Example of fitting a variational posterior with the ELBO.
```python
# We start by assuming knowledge of the log of a joint density p(z, x) over
# latent variable z and fixed measurement x. Since x is fixed, the Python
# function does not take x as an argument.
def log_joint(z):
theta = tf.Variable(0.) # Trainable variable that helps define log_joint.
...
# Next, define a Normal distribution with trainable parameters.
q = distributions.Normal(mu=tf.Variable(0.), sigma=tf.Variable(1.))
# Now, define a loss function (negative ELBO) that, when minimized, will adjust
# mu, sigma, and theta, increasing the ELBO, which we hope will both reduce the
# KL divergence between q(z) and p(z | x), and increase p(x). Note that we
# cannot guarantee both, but in general we expect both to happen.
elbo = entropy.elbo_ratio(log_p, q, n=10)
loss = -elbo
# Minimize the loss
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
tf.global_variables_initializer().run()
for step in range(100):
train_op.run()
```
## Ops
* @{tf.contrib.bayesflow.entropy.elbo_ratio}
* @{tf.contrib.bayesflow.entropy.entropy_shannon}
* @{tf.contrib.bayesflow.entropy.renyi_ratio}
* @{tf.contrib.bayesflow.entropy.renyi_alpha}

View File

@ -1,8 +1 @@
# BayesFlow Stochastic Graph (contrib)
[TOC]
Classes and helper functions for Stochastic Computation Graphs.
## Stochastic Computation Graph Helper Functions
* @{tf.contrib.bayesflow.stochastic_graph.surrogate_loss}

View File

@ -1,24 +1,3 @@
# BayesFlow Stochastic Tensors (contrib)
[TOC]
Classes and helper functions for creating Stochastic Tensors.
`StochasticTensor` objects wrap `Distribution` objects. Their
values may be samples from the underlying distribution, or the distribution
mean (as governed by `value_type`). These objects provide a `loss`
method for use when sampling from a non-reparameterized distribution.
The `loss`method is used in conjunction with `stochastic_graph.surrogate_loss`
to produce a single differentiable loss in stochastic graphs having
both continuous and discrete stochastic nodes.
## Stochastic Tensor Classes
* @{tf.contrib.bayesflow.stochastic_tensor.BaseStochasticTensor}
* @{tf.contrib.bayesflow.stochastic_tensor.StochasticTensor}
## Stochastic Tensor Value Types
* @{tf.contrib.bayesflow.stochastic_tensor.MeanValue}
* @{tf.contrib.bayesflow.stochastic_tensor.SampleValue}
* @{tf.contrib.bayesflow.stochastic_tensor.value_type}
* @{tf.contrib.bayesflow.stochastic_tensor.get_current_value_type}

View File

@ -2,10 +2,3 @@
[TOC]
Variational inference.
## Ops
* @{tf.contrib.bayesflow.variational_inference.elbo}
* @{tf.contrib.bayesflow.variational_inference.elbo_with_log_joint}
* @{tf.contrib.bayesflow.variational_inference.ELBOForms}
* @{tf.contrib.bayesflow.variational_inference.register_prior}