Avoid uncollectable cycles with a separate deleter object for resources.

PiperOrigin-RevId: 173972515
2025-12-07 12:20:24 +01:00 · 2017-10-30 17:05:22 -07:00 · 2017-10-30 17:05:22 -07:00 · 309e340619
commit 309e340619
parent 73fdaf0b56
6 changed files with 75 additions and 49 deletions
--- a/tensorflow/contrib/eager/python/datasets.py
+++ b/tensorflow/contrib/eager/python/datasets.py
@ -80,14 +80,11 @@ class Iterator(object):
          output_types=self._flat_output_types,
          output_shapes=self._flat_output_shapes)
      gen_dataset_ops.make_iterator(ds_variant, self._resource)
      # Delete the resource when this object is deleted
      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
          handle=self._resource, handle_device="/device:CPU:0")
    self._device = context.context().device_name
  def __del__(self):
    if self._resource is not None:
      with ops.device("/device:CPU:0"), context.eager_mode():
        resource_variable_ops.destroy_resource_op(self._resource)
    self._resource = None
  def __iter__(self):
    return self
--- a/tensorflow/contrib/eager/python/summary_writer.py
+++ b/tensorflow/contrib/eager/python/summary_writer.py
@ -114,11 +114,9 @@ class SummaryWriter(object):
      self._resource = gen_summary_ops.summary_writer(shared_name=self._name)
      gen_summary_ops.create_summary_file_writer(
          self._resource, logdir, max_queue, flush_secs, filename_suffix)
-
+      # Delete the resource when this object is deleted
-  def __del__(self):
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
-    if self._resource:
+          handle=self._resource, handle_device=self._CPU_DEVICE)
      resource_variable_ops.destroy_resource_op(self._resource)
      self._resource = None
  def step(self):
    """Increment the global step counter of this SummaryWriter instance."""
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@ -92,10 +92,9 @@ class SummaryWriter(object):
  def __init__(self, resource):
    self._resource = resource
  def __del__(self):
    if context.in_eager_mode():
-      resource_variable_ops.destroy_resource_op(self._resource)
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
          handle=self._resource, handle_device="cpu:0")
  def set_as_default(self):
    context.context().summary_writer_resource = self._resource
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import gc
 import numpy as np
 from tensorflow.python.eager import context
@ -38,6 +40,12 @@ from tensorflow.python.platform import test
 class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
  def tearDown(self):
    gc.collect()
    # This will only contain uncollectable garbage, i.e. reference cycles
    # involving objects with __del__ defined.
    self.assertEqual(0, len(gc.garbage))
  def testHandleDtypeShapeMatch(self):
    with self.test_session():
      handle = resource_variable_ops.var_handle_op(dtype=dtypes.int32, shape=[])
@ -477,10 +485,11 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
    with context.eager_mode():
      var = resource_variable_ops.ResourceVariable(initial_value=1.0,
                                                   name="var8")
-      var.__del__()
+      var_handle = var._handle
      del var
      with self.assertRaisesRegexp(errors.NotFoundError,
                                   r"Resource .* does not exist."):
-        resource_variable_ops.destroy_resource_op(var._handle,
+        resource_variable_ops.destroy_resource_op(var_handle,
                                                  ignore_lookup_error=False)
  def testScatterUpdate(self):
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import gc
 import numpy
 from tensorflow.python.eager import context
@ -39,6 +41,12 @@ from tensorflow.python.platform import test
 class VariableScopeTest(test.TestCase):
  def tearDown(self):
    gc.collect()
    # This will only contain uncollectable garbage, i.e. reference cycles
    # involving objects with __del__ defined.
    self.assertEqual(0, len(gc.garbage))
  def testGetVar(self):
    vs = variable_scope._get_default_variable_store()
    v = vs.get_variable("v", [1])
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@ -77,6 +77,45 @@ def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
  return handle
 class EagerResourceDeleter(object):
  """An object which cleans up a resource handle.
  An alternative to defining a __del__ method on an object. The intended use is
  that ResourceVariables or other objects with resource handles will maintain a
  single reference to this object. When the parent object is collected, this
  object will be too. Even if the parent object is part of a reference cycle,
  the cycle will be collectable.
  """
  def __init__(self, handle, handle_device):
    self._handle = handle
    self._handle_device = handle_device
  def __del__(self):
    # Resources follow object-identity when executing eagerly, so it is safe to
    # delete the resource we have a handle to. Each Graph has a unique container
    # name, which prevents resource sharing.
    try:
      # This resource was created in eager mode. However, this destructor may be
      # running in graph mode (especially during unit tests). To clean up
      # successfully, we switch back into eager mode temporarily.
      with context.eager_mode():
        with ops.device(self._handle_device):
          gen_resource_variable_ops.destroy_resource_op(
              self._handle, ignore_lookup_error=True)
    except TypeError:
      # Suppress some exceptions, mainly for the case when we're running on
      # module deletion. Things that can go wrong include the context module
      # already being unloaded, self._handle._handle_data no longer being
      # valid, and so on. Printing warnings in these cases is silly
      # (exceptions raised from __del__ are printed as warnings to stderr).
      pass  # 'NoneType' object is not callable when the handle has been
            # partially unloaded.
    except AttributeError:
      pass  # 'NoneType' object has no attribute 'eager_mode' when context has
            # been unloaded. Will catch other module unloads as well.
 def shape_safe_assign_variable_handle(handle, shape, value, name=None):
  """Helper that checks shape compatibility and assigns variable."""
  value_tensor = ops.convert_to_tensor(value)
@ -415,6 +454,15 @@ class ResourceVariable(variables.Variable):
          ops.add_to_collections(collections, self)
        elif ops.GraphKeys.GLOBAL_STEP in collections:
          ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self)
    if not self._in_graph_mode:
      # After the handle has been created, set up a way to clean it up when
      # executing eagerly. We'll hold the only reference to the deleter, so that
      # when this object is garbage collected the deleter will be too. This
      # means ResourceVariables can be part of reference cycles without those
      # cycles being uncollectable, and means that no __del__ will be defined at
      # all in graph mode.
      self._handle_deleter = EagerResourceDeleter(
          handle=self._handle, handle_device=self._handle_device)
  def _init_from_proto(self, variable_def, import_scope=None):
    """Initializes from `VariableDef` proto."""
@ -454,39 +502,6 @@ class ResourceVariable(variables.Variable):
    self._constraint = None
  # LINT.ThenChange(//tensorflow/python/eager/graph_callable.py)
  def __del__(self):
    if not self._in_graph_mode:
      # There is only one ResourceVariable object for each underlying resource
      # (cached in the Graph's VariableStore when created with get_variable), so
      # it is safe to delete the resource we have a handle to. Each Graph has a
      # unique container name in Eager, which prevents resource sharing.
      #
      # The Graph's VariableStore contains strong references to ResourceVariable
      # objects created with get_variable, so this destructor will only be
      # callled once the Graph is garbage collected for those objects. However,
      # explicitly created ResourceVariables (e.g. through tfe.Variable) may be
      # collected earlier.
      try:
        # We have checked that this ResourceVariable was created in Eager
        # mode. However, this destructor may be running in graph mode
        # (especially during unit tests). To clean up successfully, we switch
        # back into Eager temporarily.
        with context.eager_mode():
          with ops.device(self._handle_device):
            gen_resource_variable_ops.destroy_resource_op(
                self._handle, ignore_lookup_error=True)
      except TypeError:
        # Suppress some exceptions, mainly for the case when we're running on
        # module deletion. Things that can go wrong include the context module
        # already being unloaded, self._handle._handle_data no longer being
        # valid, and so on. Printing warnings in these cases is silly
        # (exceptions raised from __del__ are printed as warnings to stderr).
        pass  # 'NoneType' object is not callable when the handle has been
              # partially unloaded.
      except AttributeError:
        pass  # 'NoneType' object has no attribute 'eager_mode' when context has
              # been unloaded. Will catch other module unloads as well.
  def __nonzero__(self):
    return self.__bool__()