Avoid uncollectable cycles with a separate deleter object for resources.

PiperOrigin-RevId: 173972515
This commit is contained in:
Allen Lavoie 2017-10-30 17:05:22 -07:00 committed by TensorFlower Gardener
parent 73fdaf0b56
commit 309e340619
6 changed files with 75 additions and 49 deletions

View File

@ -80,14 +80,11 @@ class Iterator(object):
output_types=self._flat_output_types,
output_shapes=self._flat_output_shapes)
gen_dataset_ops.make_iterator(ds_variant, self._resource)
# Delete the resource when this object is deleted
self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
handle=self._resource, handle_device="/device:CPU:0")
self._device = context.context().device_name
def __del__(self):
if self._resource is not None:
with ops.device("/device:CPU:0"), context.eager_mode():
resource_variable_ops.destroy_resource_op(self._resource)
self._resource = None
def __iter__(self):
return self

View File

@ -114,11 +114,9 @@ class SummaryWriter(object):
self._resource = gen_summary_ops.summary_writer(shared_name=self._name)
gen_summary_ops.create_summary_file_writer(
self._resource, logdir, max_queue, flush_secs, filename_suffix)
def __del__(self):
if self._resource:
resource_variable_ops.destroy_resource_op(self._resource)
self._resource = None
# Delete the resource when this object is deleted
self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
handle=self._resource, handle_device=self._CPU_DEVICE)
def step(self):
"""Increment the global step counter of this SummaryWriter instance."""

View File

@ -92,10 +92,9 @@ class SummaryWriter(object):
def __init__(self, resource):
self._resource = resource
def __del__(self):
if context.in_eager_mode():
resource_variable_ops.destroy_resource_op(self._resource)
self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
handle=self._resource, handle_device="cpu:0")
def set_as_default(self):
context.context().summary_writer_resource = self._resource

View File

@ -17,6 +17,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gc
import numpy as np
from tensorflow.python.eager import context
@ -38,6 +40,12 @@ from tensorflow.python.platform import test
class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
def tearDown(self):
gc.collect()
# This will only contain uncollectable garbage, i.e. reference cycles
# involving objects with __del__ defined.
self.assertEqual(0, len(gc.garbage))
def testHandleDtypeShapeMatch(self):
with self.test_session():
handle = resource_variable_ops.var_handle_op(dtype=dtypes.int32, shape=[])
@ -477,10 +485,11 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
with context.eager_mode():
var = resource_variable_ops.ResourceVariable(initial_value=1.0,
name="var8")
var.__del__()
var_handle = var._handle
del var
with self.assertRaisesRegexp(errors.NotFoundError,
r"Resource .* does not exist."):
resource_variable_ops.destroy_resource_op(var._handle,
resource_variable_ops.destroy_resource_op(var_handle,
ignore_lookup_error=False)
def testScatterUpdate(self):

View File

@ -18,6 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gc
import numpy
from tensorflow.python.eager import context
@ -39,6 +41,12 @@ from tensorflow.python.platform import test
class VariableScopeTest(test.TestCase):
def tearDown(self):
gc.collect()
# This will only contain uncollectable garbage, i.e. reference cycles
# involving objects with __del__ defined.
self.assertEqual(0, len(gc.garbage))
def testGetVar(self):
vs = variable_scope._get_default_variable_store()
v = vs.get_variable("v", [1])

View File

@ -77,6 +77,45 @@ def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
return handle
class EagerResourceDeleter(object):
"""An object which cleans up a resource handle.
An alternative to defining a __del__ method on an object. The intended use is
that ResourceVariables or other objects with resource handles will maintain a
single reference to this object. When the parent object is collected, this
object will be too. Even if the parent object is part of a reference cycle,
the cycle will be collectable.
"""
def __init__(self, handle, handle_device):
self._handle = handle
self._handle_device = handle_device
def __del__(self):
# Resources follow object-identity when executing eagerly, so it is safe to
# delete the resource we have a handle to. Each Graph has a unique container
# name, which prevents resource sharing.
try:
# This resource was created in eager mode. However, this destructor may be
# running in graph mode (especially during unit tests). To clean up
# successfully, we switch back into eager mode temporarily.
with context.eager_mode():
with ops.device(self._handle_device):
gen_resource_variable_ops.destroy_resource_op(
self._handle, ignore_lookup_error=True)
except TypeError:
# Suppress some exceptions, mainly for the case when we're running on
# module deletion. Things that can go wrong include the context module
# already being unloaded, self._handle._handle_data no longer being
# valid, and so on. Printing warnings in these cases is silly
# (exceptions raised from __del__ are printed as warnings to stderr).
pass # 'NoneType' object is not callable when the handle has been
# partially unloaded.
except AttributeError:
pass # 'NoneType' object has no attribute 'eager_mode' when context has
# been unloaded. Will catch other module unloads as well.
def shape_safe_assign_variable_handle(handle, shape, value, name=None):
"""Helper that checks shape compatibility and assigns variable."""
value_tensor = ops.convert_to_tensor(value)
@ -415,6 +454,15 @@ class ResourceVariable(variables.Variable):
ops.add_to_collections(collections, self)
elif ops.GraphKeys.GLOBAL_STEP in collections:
ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self)
if not self._in_graph_mode:
# After the handle has been created, set up a way to clean it up when
# executing eagerly. We'll hold the only reference to the deleter, so that
# when this object is garbage collected the deleter will be too. This
# means ResourceVariables can be part of reference cycles without those
# cycles being uncollectable, and means that no __del__ will be defined at
# all in graph mode.
self._handle_deleter = EagerResourceDeleter(
handle=self._handle, handle_device=self._handle_device)
def _init_from_proto(self, variable_def, import_scope=None):
"""Initializes from `VariableDef` proto."""
@ -454,39 +502,6 @@ class ResourceVariable(variables.Variable):
self._constraint = None
# LINT.ThenChange(//tensorflow/python/eager/graph_callable.py)
def __del__(self):
if not self._in_graph_mode:
# There is only one ResourceVariable object for each underlying resource
# (cached in the Graph's VariableStore when created with get_variable), so
# it is safe to delete the resource we have a handle to. Each Graph has a
# unique container name in Eager, which prevents resource sharing.
#
# The Graph's VariableStore contains strong references to ResourceVariable
# objects created with get_variable, so this destructor will only be
# callled once the Graph is garbage collected for those objects. However,
# explicitly created ResourceVariables (e.g. through tfe.Variable) may be
# collected earlier.
try:
# We have checked that this ResourceVariable was created in Eager
# mode. However, this destructor may be running in graph mode
# (especially during unit tests). To clean up successfully, we switch
# back into Eager temporarily.
with context.eager_mode():
with ops.device(self._handle_device):
gen_resource_variable_ops.destroy_resource_op(
self._handle, ignore_lookup_error=True)
except TypeError:
# Suppress some exceptions, mainly for the case when we're running on
# module deletion. Things that can go wrong include the context module
# already being unloaded, self._handle._handle_data no longer being
# valid, and so on. Printing warnings in these cases is silly
# (exceptions raised from __del__ are printed as warnings to stderr).
pass # 'NoneType' object is not callable when the handle has been
# partially unloaded.
except AttributeError:
pass # 'NoneType' object has no attribute 'eager_mode' when context has
# been unloaded. Will catch other module unloads as well.
def __nonzero__(self):
return self.__bool__()