PiecewiseLinearTransformOp transform binary predictions specially

Summary: The existing op tranforms the input in a general way. It needs M transform mappings to transform a NxM input tensor. But for binary predictions X (Nx2 tensor), we know that X[:, 0] = 1 - X[:, 1]. So we just need one mapping for X[:, 1]. After being transformed, we can compute X[:, 0]. This diff is to handle this. Differential Revision: D4550441 fbshipit-source-id: 42d8c6e88d830c97628ee930b543740a32acf904
2025-12-07 12:21:27 +01:00 · 2017-02-15 10:41:48 -08:00 · 2017-02-15 10:41:48 -08:00 · cb3c41b9a9
commit cb3c41b9a9
parent 718786add7
4 changed files with 132 additions and 38 deletions
--- a/caffe2/operators/piecewise_linear_transform_op.cc
+++ b/caffe2/operators/piecewise_linear_transform_op.cc
@ -18,7 +18,8 @@ transformation. Each feature dimension has its own piecewise linear
 transformation function. Therefore the size of piecewise function parameters are
 all (pieces x prediction_dimensions). Note that in each piece, low bound is
 excluded while high bound is included. Also the piecewise linear function
-must be continuous.
+must be continuous. If the input is binary predictions (Nx2 tensor), set
+the binary arg to true (see details below).
 )DOC")
    .Input(
        0,
@ -42,6 +43,12 @@ must be continuous.
    .Arg(
        "pieces",
        "int value for the number of pieces for the piecewise linear function")
+    .Arg(
+        "binary",
+        "If set true, we assume the input is a Nx2 tensor. Its first column is "
+        "negative predictions and second column is positive and "
+        "negative + positive = 1. We just need one set of transforms for the "
+        "positive column.")
    .Output(
        0,
        "transforms",
--- a/caffe2/operators/piecewise_linear_transform_op.h
+++ b/caffe2/operators/piecewise_linear_transform_op.h
@ -14,20 +14,35 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
  PiecewiseLinearTransformOp(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws) {
    int num_piece = OperatorBase::GetSingleArgument<int>("pieces", 0);
+    binary_ = OperatorBase::GetSingleArgument<bool>("binary", false);
    CAFFE_ENFORCE(
        num_piece > 0,
        "No pieces specified, please specify pieces through args");
    range_ = SetPiecewiseLinearFunctionParameter("bounds", num_piece + 1);
    W_ = SetPiecewiseLinearFunctionParameter("slopes", num_piece);
    b_ = SetPiecewiseLinearFunctionParameter("intercepts", num_piece);
+
+    CAFFE_ENFORCE_EQ(range_.size(), W_.size());
+    CAFFE_ENFORCE_EQ(range_.size(), b_.size());
+
+    if (binary_) {
+      CAFFE_ENFORCE_EQ(range_.size(), 1);
+      CAFFE_ENFORCE_EQ(W_.size(), 1);
+      CAFFE_ENFORCE_EQ(b_.size(), 1);
+    }
  }

  bool RunOnDevice() override {
+    return binary_ ? TransformBinary() : TransformGeneral();
+  }
+
+ private:
+  bool TransformGeneral() {
    auto& X = Input(0);
    auto* Y = Output(0);
    DCHECK_EQ(X.ndim(), 2);
-    int N = X.dim32(0);
-    int M = X.dim32(1);
+    TIndex N = X.dim32(0);
+    TIndex M = X.dim32(1);
    DCHECK_EQ(range_.size(), M);
    DCHECK_EQ(W_.size(), M);
    DCHECK_EQ(b_.size(), M);
@ -36,8 +51,8 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
    const auto* Xdata = X.template data<float>();
    float* Ydata = Y->template mutable_data<float>();

-    for (int j = 0; j < M; ++j) {
-      for (int i = 0; i < N; ++i) {
+    for (TIndex j = 0; j < M; ++j) {
+      for (TIndex i = 0; i < N; ++i) {
        Ydata[i * M + j] = Piecewise_Linear_Transform(
            Xdata[i * M + j], range_[j], W_[j], b_[j]);
      }
@ -45,13 +60,35 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
    return true;
  }

- protected:
+  bool TransformBinary() {
+    auto& X = Input(0);
+    auto* Y = Output(0);
+    DCHECK_EQ(X.ndim(), 2);
+    TIndex N = X.dim32(0);
+    TIndex M = X.dim32(1);
+    CAFFE_ENFORCE_EQ(
+        M, 2, "If binary is set to true, the input must be Nx2 tensor");
+    Y->ResizeLike(X);
+    const auto* Xdata = X.template data<float>();
+    float* Ydata = Y->template mutable_data<float>();
+
+    for (TIndex i = 0; i < N; ++i) {
+      Ydata[i * M + 1] =
+          Piecewise_Linear_Transform(Xdata[i * M + 1], range_[0], W_[0], b_[0]);
+      Ydata[i * M] = 1.0f - Ydata[i * M + 1];
+    }
+
+    return true;
+  }
+
  vector<vector<T>> SetPiecewiseLinearFunctionParameter(
      const string& arg,
      const int denom) {
    vector<vector<T>> param;
    vector<T> param_flat = OperatorBase::GetRepeatedArgument<T>(arg);
+    CAFFE_ENFORCE_EQ(param_flat.size() % denom, 0);
    int num_dim = param_flat.size() / denom;
+    CAFFE_ENFORCE_GT(num_dim, 0);
    param.resize(num_dim);
    for (int i = 0; i < num_dim; i++) {
      param[i] = vector<T>(
@ -86,6 +123,7 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
  vector<vector<T>> range_;
  vector<vector<T>> W_;
  vector<vector<T>> b_;
+  bool binary_;
 };

 } // namespace caffe2
--- a/caffe2/python/hypothesis_test.py
+++ b/caffe2/python/hypothesis_test.py
@ -2076,38 +2076,6 @@ class TestOperators(hu.HypothesisTestCase):
        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])

-    @given(n=st.integers(10000, 10003), **hu.gcs_cpu_only)
-    def test_piecewise_linear_transform(self, n, gc, dc):
-        W = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
-        b = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
-        # make sure bucket range are increating!
-        bucket_range = np.random.uniform(0.1, 0.9,
-                                         (2, n + 1)).astype(np.float32)
-        bucket_base = np.array(list(range(n + 1)))
-        bucket_range[0, :] = bucket_range[0, :] + bucket_base
-        bucket_range[1, :] = bucket_range[1, :] + bucket_base
-        # make x[i] inside bucket i, for the ease of testing
-        X = np.random.uniform(0, 0.9, (n, 2)).astype(np.float32)
-        for i in range(len(X)):
-            X[i][0] = X[i][0] * bucket_range[0][i] + \
-                (1 - X[i][0]) * bucket_range[0][i + 1]
-            X[i][1] = X[i][1] * bucket_range[1][i] + \
-                (1 - X[i][1]) * bucket_range[1][i + 1]
-
-        op = core.CreateOperator(
-            "PiecewiseLinearTransform", ["X"], ["Y"],
-            bounds=bucket_range.flatten().tolist(),
-            slopes=W.flatten().tolist(),
-            intercepts=b.flatten().tolist(),
-            pieces=n
-        )
-
-        def piecewise(x, *args, **kw):
-            return [W.transpose() * x + b.transpose()]
-
-        self.assertReferenceChecks(gc, op, [X], piecewise)
-        self.assertDeviceChecks(dc, op, [X], [0])
-
    @given(X=hu.tensor(min_dim=1,
                       max_dim=4,
                       elements=st.floats(min_value=-100, max_value=100)),
--- a/caffe2/python/operator_test/piecewise_linear_transform_test.py
+++ b/caffe2/python/operator_test/piecewise_linear_transform_test.py
@ -0,0 +1,81 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from caffe2.python import core
+from hypothesis import given
+import hypothesis.strategies as st
+import caffe2.python.hypothesis_test_util as hu
+import numpy as np
+
+import unittest
+
+
+class TestPiecewiseLinearTransform(hu.HypothesisTestCase):
+    @given(n=st.integers(1, 100), **hu.gcs_cpu_only)
+    def test_piecewise_linear_transform_general(self, n, gc, dc):
+        W = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
+        b = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
+        # make sure bucket range are increating!
+        bucket_range = np.random.uniform(0.1, 0.9,
+                                         (2, n + 1)).astype(np.float32)
+        bucket_base = np.array(list(range(n + 1)))
+        bucket_range[0, :] = bucket_range[0, :] + bucket_base
+        bucket_range[1, :] = bucket_range[1, :] + bucket_base
+        # make x[i] inside bucket i, for the ease of testing
+        X = np.random.uniform(0, 0.9, (n, 2)).astype(np.float32)
+        for i in range(len(X)):
+            X[i][0] = X[i][0] * bucket_range[0][i] + \
+                (1 - X[i][0]) * bucket_range[0][i + 1]
+            X[i][1] = X[i][1] * bucket_range[1][i] + \
+                (1 - X[i][1]) * bucket_range[1][i + 1]
+
+        op = core.CreateOperator(
+            "PiecewiseLinearTransform", ["X"], ["Y"],
+            bounds=bucket_range.flatten().tolist(),
+            slopes=W.flatten().tolist(),
+            intercepts=b.flatten().tolist(),
+            pieces=n
+        )
+
+        def piecewise(x, *args, **kw):
+            return [W.transpose() * x + b.transpose()]
+
+        self.assertReferenceChecks(gc, op, [X], piecewise)
+        self.assertDeviceChecks(dc, op, [X], [0])
+
+    @given(n=st.integers(1, 100), **hu.gcs_cpu_only)
+    def test_piecewise_linear_transform_binary(self, n, gc, dc):
+        W = np.random.uniform(-1, 1, size=n).astype(np.float32)
+        b = np.random.uniform(-1, 1, size=n).astype(np.float32)
+        bucket_range = np.random.uniform(
+            0, 1, n + 1).astype(np.float32)
+        bucket_range.sort()
+
+        # make x[i] inside bucket i, for the ease of testing
+        X = np.random.uniform(0, 0.9, (n, 2)).astype(np.float32)
+        for i in range(len(X)):
+            X[i][1] = X[i][1] * bucket_range[i] + \
+                (1 - X[i][1]) * bucket_range[i + 1]
+        X[:, 0] = 1 - X[:, 1]
+
+        op = core.CreateOperator(
+            "PiecewiseLinearTransform", ["X"], ["Y"],
+            bounds=bucket_range.flatten().tolist(),
+            slopes=W.flatten().tolist(),
+            intercepts=b.flatten().tolist(),
+            pieces=n,
+            binary=True,
+        )
+
+        def piecewise(x):
+            positive = W.transpose() * x[:, 1] + b.transpose()
+            return [np.vstack((1 - positive, positive)).transpose()]
+
+        self.assertReferenceChecks(gc, op, [X], piecewise)
+        self.assertDeviceChecks(dc, op, [X], [0])
+
+
+if __name__ == "__main__":
+    unittest.main()