Avoid uncollectable cycles with a separate deleter object for resources.

PiperOrigin-RevId: 173972515
2025-12-06 12:20:11 +01:00 · 2017-10-30 17:05:22 -07:00 · 2017-10-30 17:05:22 -07:00 · 309e340619
commit 309e340619
parent 73fdaf0b56
6 changed files with 75 additions and 49 deletions
--- a/tensorflow/contrib/eager/python/datasets.py
+++ b/tensorflow/contrib/eager/python/datasets.py
@ -80,14 +80,11 @@ class Iterator(object):
          output_types=self._flat_output_types,
          output_shapes=self._flat_output_shapes)
      gen_dataset_ops.make_iterator(ds_variant, self._resource)
+      # Delete the resource when this object is deleted
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
+          handle=self._resource, handle_device="/device:CPU:0")
    self._device = context.context().device_name

-  def __del__(self):
-    if self._resource is not None:
-      with ops.device("/device:CPU:0"), context.eager_mode():
-        resource_variable_ops.destroy_resource_op(self._resource)
-    self._resource = None
-
  def __iter__(self):
    return self

--- a/tensorflow/contrib/eager/python/summary_writer.py
+++ b/tensorflow/contrib/eager/python/summary_writer.py
@ -114,11 +114,9 @@ class SummaryWriter(object):
      self._resource = gen_summary_ops.summary_writer(shared_name=self._name)
      gen_summary_ops.create_summary_file_writer(
          self._resource, logdir, max_queue, flush_secs, filename_suffix)
-
-  def __del__(self):
-    if self._resource:
-      resource_variable_ops.destroy_resource_op(self._resource)
-      self._resource = None
+      # Delete the resource when this object is deleted
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
+          handle=self._resource, handle_device=self._CPU_DEVICE)

  def step(self):
    """Increment the global step counter of this SummaryWriter instance."""
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@ -92,10 +92,9 @@ class SummaryWriter(object):

  def __init__(self, resource):
    self._resource = resource
-
-  def __del__(self):
    if context.in_eager_mode():
-      resource_variable_ops.destroy_resource_op(self._resource)
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
+          handle=self._resource, handle_device="cpu:0")

  def set_as_default(self):
    context.context().summary_writer_resource = self._resource
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import gc
+
 import numpy as np

 from tensorflow.python.eager import context
@ -38,6 +40,12 @@ from tensorflow.python.platform import test

 class ResourceVariableOpsTest(test_util.TensorFlowTestCase):

+  def tearDown(self):
+    gc.collect()
+    # This will only contain uncollectable garbage, i.e. reference cycles
+    # involving objects with __del__ defined.
+    self.assertEqual(0, len(gc.garbage))
+
  def testHandleDtypeShapeMatch(self):
    with self.test_session():
      handle = resource_variable_ops.var_handle_op(dtype=dtypes.int32, shape=[])
@ -477,10 +485,11 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
    with context.eager_mode():
      var = resource_variable_ops.ResourceVariable(initial_value=1.0,
                                                   name="var8")
-      var.__del__()
+      var_handle = var._handle
+      del var
      with self.assertRaisesRegexp(errors.NotFoundError,
                                   r"Resource .* does not exist."):
-        resource_variable_ops.destroy_resource_op(var._handle,
+        resource_variable_ops.destroy_resource_op(var_handle,
                                                  ignore_lookup_error=False)

  def testScatterUpdate(self):
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import gc
+
 import numpy

 from tensorflow.python.eager import context
@ -39,6 +41,12 @@ from tensorflow.python.platform import test

 class VariableScopeTest(test.TestCase):

+  def tearDown(self):
+    gc.collect()
+    # This will only contain uncollectable garbage, i.e. reference cycles
+    # involving objects with __del__ defined.
+    self.assertEqual(0, len(gc.garbage))
+
  def testGetVar(self):
    vs = variable_scope._get_default_variable_store()
    v = vs.get_variable("v", [1])
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@ -77,6 +77,45 @@ def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
  return handle


+class EagerResourceDeleter(object):
+  """An object which cleans up a resource handle.
+
+  An alternative to defining a __del__ method on an object. The intended use is
+  that ResourceVariables or other objects with resource handles will maintain a
+  single reference to this object. When the parent object is collected, this
+  object will be too. Even if the parent object is part of a reference cycle,
+  the cycle will be collectable.
+  """
+
+  def __init__(self, handle, handle_device):
+    self._handle = handle
+    self._handle_device = handle_device
+
+  def __del__(self):
+    # Resources follow object-identity when executing eagerly, so it is safe to
+    # delete the resource we have a handle to. Each Graph has a unique container
+    # name, which prevents resource sharing.
+    try:
+      # This resource was created in eager mode. However, this destructor may be
+      # running in graph mode (especially during unit tests). To clean up
+      # successfully, we switch back into eager mode temporarily.
+      with context.eager_mode():
+        with ops.device(self._handle_device):
+          gen_resource_variable_ops.destroy_resource_op(
+              self._handle, ignore_lookup_error=True)
+    except TypeError:
+      # Suppress some exceptions, mainly for the case when we're running on
+      # module deletion. Things that can go wrong include the context module
+      # already being unloaded, self._handle._handle_data no longer being
+      # valid, and so on. Printing warnings in these cases is silly
+      # (exceptions raised from __del__ are printed as warnings to stderr).
+      pass  # 'NoneType' object is not callable when the handle has been
+            # partially unloaded.
+    except AttributeError:
+      pass  # 'NoneType' object has no attribute 'eager_mode' when context has
+            # been unloaded. Will catch other module unloads as well.
+
+
 def shape_safe_assign_variable_handle(handle, shape, value, name=None):
  """Helper that checks shape compatibility and assigns variable."""
  value_tensor = ops.convert_to_tensor(value)
@ -415,6 +454,15 @@ class ResourceVariable(variables.Variable):
          ops.add_to_collections(collections, self)
        elif ops.GraphKeys.GLOBAL_STEP in collections:
          ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self)
+    if not self._in_graph_mode:
+      # After the handle has been created, set up a way to clean it up when
+      # executing eagerly. We'll hold the only reference to the deleter, so that
+      # when this object is garbage collected the deleter will be too. This
+      # means ResourceVariables can be part of reference cycles without those
+      # cycles being uncollectable, and means that no __del__ will be defined at
+      # all in graph mode.
+      self._handle_deleter = EagerResourceDeleter(
+          handle=self._handle, handle_device=self._handle_device)

  def _init_from_proto(self, variable_def, import_scope=None):
    """Initializes from `VariableDef` proto."""
@ -454,39 +502,6 @@ class ResourceVariable(variables.Variable):
    self._constraint = None
  # LINT.ThenChange(//tensorflow/python/eager/graph_callable.py)

-  def __del__(self):
-    if not self._in_graph_mode:
-      # There is only one ResourceVariable object for each underlying resource
-      # (cached in the Graph's VariableStore when created with get_variable), so
-      # it is safe to delete the resource we have a handle to. Each Graph has a
-      # unique container name in Eager, which prevents resource sharing.
-      #
-      # The Graph's VariableStore contains strong references to ResourceVariable
-      # objects created with get_variable, so this destructor will only be
-      # callled once the Graph is garbage collected for those objects. However,
-      # explicitly created ResourceVariables (e.g. through tfe.Variable) may be
-      # collected earlier.
-      try:
-        # We have checked that this ResourceVariable was created in Eager
-        # mode. However, this destructor may be running in graph mode
-        # (especially during unit tests). To clean up successfully, we switch
-        # back into Eager temporarily.
-        with context.eager_mode():
-          with ops.device(self._handle_device):
-            gen_resource_variable_ops.destroy_resource_op(
-                self._handle, ignore_lookup_error=True)
-      except TypeError:
-        # Suppress some exceptions, mainly for the case when we're running on
-        # module deletion. Things that can go wrong include the context module
-        # already being unloaded, self._handle._handle_data no longer being
-        # valid, and so on. Printing warnings in these cases is silly
-        # (exceptions raised from __del__ are printed as warnings to stderr).
-        pass  # 'NoneType' object is not callable when the handle has been
-              # partially unloaded.
-      except AttributeError:
-        pass  # 'NoneType' object has no attribute 'eager_mode' when context has
-              # been unloaded. Will catch other module unloads as well.
-
  def __nonzero__(self):
    return self.__bool__()