Make fills work with input_shape when run in CUDAContext

Summary: If ConstantFill (or other fill op) is used in CUDAContext, with input_as_shape, the code crashes as it expects the shape be in CUDAContext but accesses the array in host code... We could fix this by copying the values from the CUDA tensor, but it is probably best to enforce the shape param is in CPU context. This is what this diff does. Differential Revision: D5152766 fbshipit-source-id: 0629a189bd1d800c0b7c9dbc324b78d279efac0b
2025-12-07 12:21:27 +01:00 · 2017-05-30 20:37:54 -07:00 · 2017-05-30 20:37:54 -07:00 · 96d8ae2163
commit 96d8ae2163
parent 846240a340
3 changed files with 27 additions and 19 deletions
--- a/caffe2/operators/filler_op.cc
+++ b/caffe2/operators/filler_op.cc
@ -24,7 +24,6 @@ REGISTER_CPU_OPERATOR(MSRAFill, MSRAFillOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(RangeFill, RangeFillOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(LengthsRangeFill, LengthsRangeFillOp<CPUContext>);

-
 OPERATOR_SCHEMA(ConstantFill)
    .NumInputs(0, 1)
    .NumOutputs(1)
@ -64,7 +63,9 @@ NOTE: Currently, it supports data type of float, int32, int64, and bool.
        "The additional dimensions appended at the end of the shape indicated"
        "by the input blob."
        "Cannot set the extra_shape argument when there is no input blob.")
-    .Arg("input_as_shape", "1D tensor containing the desired output shape")
+    .Arg(
+        "input_as_shape",
+        "1D tensor containing the desired output shape.  First input must be in CPU context.")
    .Input(0, "input", "Input tensor (optional) to provide shape information.")
    .Output(
        0,
@ -92,7 +93,9 @@ The shape of the output can be given as argument or input.
    .Arg("min", "minimum value, inclusive")
    .Arg("max", "maximum value, inclusive")
    .Arg("shape", "shape of the output, do not set when input_as_shape=1")
-    .Arg("input_as_shape", "set to 1 to use the first input as shape")
+    .Arg(
+        "input_as_shape",
+        "set to 1 to use the first input as shape. First input must be in CPU context.")
    .Input(
        0,
        "SHAPE",
@ -136,7 +139,9 @@ input.
        "The additional dimensions appended at the end of the shape indicated"
        "by the input blob. "
        "Cannot set the extra_shape argument when there is no input blob.")
-    .Arg("input_as_shape", "1D tensor containing the desired output shape")
+    .Arg(
+        "input_as_shape",
+        "1D tensor containing the desired output shape. First input must be in CPU context.")
    .Input(0, "input", "Input tensor to provide shape information")
    .Input(
        1,
--- a/caffe2/operators/filler_op.h
+++ b/caffe2/operators/filler_op.h
@ -53,9 +53,10 @@ class FillerOp : public Operator<Context> {
  bool RunOnDevice() override {
    auto* output = Operator<Context>::Output(0);
    if (InputSize()) {
-      auto& input = Input(0);
      auto shape = vector<TIndex>{};
      if (input_as_shape_) {
+        // Shape input must be in CPU context
+        auto& input = OperatorBase::Input<Tensor<CPUContext>>(0);
        CAFFE_ENFORCE_EQ(
            input.ndim(),
            1,
@ -64,6 +65,7 @@ class FillerOp : public Operator<Context> {
        auto* shape_data = input.template data<TIndex>();
        shape.insert(shape.end(), shape_data, shape_data + input.dim32(0));
      } else {
+        auto& input = Input(0);
        shape.insert(shape.end(), input.dims().begin(), input.dims().end());
      }
      shape.insert(shape.end(), extra_shape_.begin(), extra_shape_.end());
--- a/caffe2/python/operator_test/filler_ops_test.py
+++ b/caffe2/python/operator_test/filler_ops_test.py
@ -6,6 +6,7 @@ from __future__ import unicode_literals
 import hypothesis.strategies as st

 from caffe2.python import core, workspace
+from caffe2.proto import caffe2_pb2
 from hypothesis import given
 import caffe2.python.hypothesis_test_util as hu

@ -51,29 +52,29 @@ class TestFillerOperator(hu.HypothesisTestCase):
        ),
        a=st.integers(min_value=0, max_value=100),
        b=st.integers(min_value=0, max_value=100),
-        **hu.gcs_cpu_only
+        **hu.gcs
    )
    def test_uniform_int_fill_op_blob_input(self, shape, a, b, gc, dc):
        net = core.Net('test_net')
-        shape_blob = net.Const(shape, dtype=np.int64)
+
+        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
+            shape_blob = net.Const(shape, dtype=np.int64)
        a_blob = net.Const(a, dtype=np.int32)
        b_blob = net.Const(b, dtype=np.int32)
        uniform_fill = net.UniformIntFill([shape_blob, a_blob, b_blob],
                                          1, input_as_shape=1)

-        for device_option in dc:
-            net._net.device_option.CopyFrom(device_option)
-            workspace.RunNetOnce(net)
+        workspace.RunNetOnce(net)

-            blob_out = workspace.FetchBlob(uniform_fill)
-            if b < a:
-                new_shape = shape[:]
-                new_shape[0] = 0
-                np.testing.assert_array_equal(new_shape, blob_out.shape)
-            else:
-                np.testing.assert_array_equal(shape, blob_out.shape)
-                self.assertTrue((blob_out >= a).all())
-                self.assertTrue((blob_out <= b).all())
+        blob_out = workspace.FetchBlob(uniform_fill)
+        if b < a:
+            new_shape = shape[:]
+            new_shape[0] = 0
+            np.testing.assert_array_equal(new_shape, blob_out.shape)
+        else:
+            np.testing.assert_array_equal(shape, blob_out.shape)
+            self.assertTrue((blob_out >= a).all())
+            self.assertTrue((blob_out <= b).all())

    @given(**hu.gcs)
    def test_gaussian_fill_op(self, gc, dc):