Fixing CPU implementation of parallel_stack for tensors with non-zero rank.

PiperOrigin-RevId: 170942814
2025-12-07 00:20:20 +01:00 · 2017-10-03 17:39:55 -07:00 · 2017-10-03 17:39:55 -07:00 · b959da92f9
commit b959da92f9
parent 4cf61262ae
2 changed files with 53 additions and 8 deletions
--- a/tensorflow/core/kernels/inplace_ops.cc
+++ b/tensorflow/core/kernels/inplace_ops.cc
@ -34,7 +34,7 @@ namespace functor {
 template <typename Device, typename T>
 Status DoParallelConcatUpdate(const Device& d, const Tensor& value,
                              int32 loc, Tensor* output) {
-  auto Tvalue = value.flat_outer_dims<T>();
+  auto Tvalue = value.shaped<T, 2>({1, value.NumElements()});
  auto Toutput = output->flat_outer_dims<T>();
  auto nrows = Toutput.dimension(0);
  auto r = (loc % nrows + nrows) % nrows;  // Guard index range.
--- a/tensorflow/python/kernel_tests/stack_op_test.py
+++ b/tensorflow/python/kernel_tests/stack_op_test.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Functional tests for Pack Op."""
+"""Functional tests for Stack and ParallelStack Ops."""

 from __future__ import absolute_import
 from __future__ import division
@ -54,7 +54,16 @@ class StackOpTest(test.TestCase):
          c = array_ops.stack(xs)
          self.assertAllEqual(c.eval(), data)

-  def testSimpleParallel(self):
+  def testSimpleParallelCPU(self):
+    np.random.seed(7)
+    with self.test_session(use_gpu=False):
+      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
+        data = np.random.randn(*shape).astype(np.float32)
+        xs = list(map(constant_op.constant, data))
+        c = array_ops.parallel_stack(xs)
+        self.assertAllEqual(c.eval(), data)
+
+  def testSimpleParallelGPU(self):
    np.random.seed(7)
    with self.test_session(use_gpu=True):
      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
@ -87,7 +96,21 @@ class StackOpTest(test.TestCase):
        b = array_ops.reshape(a, array_ops.stack([2, 3]))
        self.assertAllEqual(b.get_shape(), [2, 3])

-  def testConstParallel(self):
+  def testConstParallelCPU(self):
+    np.random.seed(7)
+    with self.test_session(use_gpu=False):
+      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
+        data = np.random.randn(*shape).astype(np.float32)
+        if len(shape) == 1:
+          data_list = list(data)
+          cl = array_ops.parallel_stack(data_list)
+          self.assertAllEqual(cl.eval(), data)
+
+        data = np.random.randn(*shape).astype(np.float32)
+        c = array_ops.parallel_stack(data)
+        self.assertAllEqual(c.eval(), data)
+
+  def testConstParallelGPU(self):
    np.random.seed(7)
    with self.test_session(use_gpu=True):
      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
@ -127,7 +150,18 @@ class StackOpTest(test.TestCase):
        err = gradient_checker.compute_gradient_error(xs, shapes, c, out_shape)
        self.assertLess(err, 1e-6)

-  def testZeroSize(self):
+  def testZeroSizeCPU(self):
+    # Verify that stack doesn't crash for zero size inputs
+    with self.test_session(use_gpu=False):
+      for shape in (0,), (3, 0), (0, 3):
+        x = np.zeros((2,) + shape).astype(np.int32)
+        p = array_ops.stack(list(x)).eval()
+        self.assertAllEqual(p, x)
+
+        p = array_ops.parallel_stack(list(x)).eval()
+        self.assertAllEqual(p, x)
+
+  def testZeroSizeGPU(self):
    # Verify that stack doesn't crash for zero size inputs
    with self.test_session(use_gpu=True):
      for shape in (0,), (3, 0), (0, 3):
@ -138,14 +172,25 @@ class StackOpTest(test.TestCase):
        p = array_ops.parallel_stack(list(x)).eval()
        self.assertAllEqual(p, x)

-  def testAxis0Default(self):
+  def testAxis0DefaultCPU(self):
+    with self.test_session(use_gpu=False):
+      t = [constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6])]
+      stacked = array_ops.stack(t).eval()
+      parallel_stacked = array_ops.parallel_stack(t).eval()
+
+    expected = np.array([[1, 2, 3], [4, 5, 6]])
+    self.assertAllEqual(stacked, expected)
+    self.assertAllEqual(parallel_stacked, expected)
+
+  def testAxis0DefaultGPU(self):
    with self.test_session(use_gpu=True):
      t = [constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6])]
      stacked = array_ops.stack(t).eval()
      parallel_stacked = array_ops.parallel_stack(t).eval()

-    self.assertAllEqual(stacked, np.array([[1, 2, 3], [4, 5, 6]]))
-    self.assertAllEqual(parallel_stacked, np.array([[1, 2, 3], [4, 5, 6]]))
+    expected = np.array([[1, 2, 3], [4, 5, 6]])
+    self.assertAllEqual(stacked, expected)
+    self.assertAllEqual(parallel_stacked, expected)

  def testAgainstNumpy(self):
    # For 1 to 5 dimensions.