mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
PiecewiseLinearTransformOp transform binary predictions specially
Summary: The existing op tranforms the input in a general way. It needs M transform mappings to transform a NxM input tensor. But for binary predictions X (Nx2 tensor), we know that X[:, 0] = 1 - X[:, 1]. So we just need one mapping for X[:, 1]. After being transformed, we can compute X[:, 0]. This diff is to handle this. Differential Revision: D4550441 fbshipit-source-id: 42d8c6e88d830c97628ee930b543740a32acf904
This commit is contained in:
parent
718786add7
commit
cb3c41b9a9
|
|
@ -18,7 +18,8 @@ transformation. Each feature dimension has its own piecewise linear
|
|||
transformation function. Therefore the size of piecewise function parameters are
|
||||
all (pieces x prediction_dimensions). Note that in each piece, low bound is
|
||||
excluded while high bound is included. Also the piecewise linear function
|
||||
must be continuous.
|
||||
must be continuous. If the input is binary predictions (Nx2 tensor), set
|
||||
the binary arg to true (see details below).
|
||||
)DOC")
|
||||
.Input(
|
||||
0,
|
||||
|
|
@ -42,6 +43,12 @@ must be continuous.
|
|||
.Arg(
|
||||
"pieces",
|
||||
"int value for the number of pieces for the piecewise linear function")
|
||||
.Arg(
|
||||
"binary",
|
||||
"If set true, we assume the input is a Nx2 tensor. Its first column is "
|
||||
"negative predictions and second column is positive and "
|
||||
"negative + positive = 1. We just need one set of transforms for the "
|
||||
"positive column.")
|
||||
.Output(
|
||||
0,
|
||||
"transforms",
|
||||
|
|
|
|||
|
|
@ -14,20 +14,35 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
|
|||
PiecewiseLinearTransformOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {
|
||||
int num_piece = OperatorBase::GetSingleArgument<int>("pieces", 0);
|
||||
binary_ = OperatorBase::GetSingleArgument<bool>("binary", false);
|
||||
CAFFE_ENFORCE(
|
||||
num_piece > 0,
|
||||
"No pieces specified, please specify pieces through args");
|
||||
range_ = SetPiecewiseLinearFunctionParameter("bounds", num_piece + 1);
|
||||
W_ = SetPiecewiseLinearFunctionParameter("slopes", num_piece);
|
||||
b_ = SetPiecewiseLinearFunctionParameter("intercepts", num_piece);
|
||||
|
||||
CAFFE_ENFORCE_EQ(range_.size(), W_.size());
|
||||
CAFFE_ENFORCE_EQ(range_.size(), b_.size());
|
||||
|
||||
if (binary_) {
|
||||
CAFFE_ENFORCE_EQ(range_.size(), 1);
|
||||
CAFFE_ENFORCE_EQ(W_.size(), 1);
|
||||
CAFFE_ENFORCE_EQ(b_.size(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
return binary_ ? TransformBinary() : TransformGeneral();
|
||||
}
|
||||
|
||||
private:
|
||||
bool TransformGeneral() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
int N = X.dim32(0);
|
||||
int M = X.dim32(1);
|
||||
TIndex N = X.dim32(0);
|
||||
TIndex M = X.dim32(1);
|
||||
DCHECK_EQ(range_.size(), M);
|
||||
DCHECK_EQ(W_.size(), M);
|
||||
DCHECK_EQ(b_.size(), M);
|
||||
|
|
@ -36,8 +51,8 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
|
|||
const auto* Xdata = X.template data<float>();
|
||||
float* Ydata = Y->template mutable_data<float>();
|
||||
|
||||
for (int j = 0; j < M; ++j) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
for (TIndex j = 0; j < M; ++j) {
|
||||
for (TIndex i = 0; i < N; ++i) {
|
||||
Ydata[i * M + j] = Piecewise_Linear_Transform(
|
||||
Xdata[i * M + j], range_[j], W_[j], b_[j]);
|
||||
}
|
||||
|
|
@ -45,13 +60,35 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
|
|||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool TransformBinary() {
|
||||
auto& X = Input(0);
|
||||
auto* Y = Output(0);
|
||||
DCHECK_EQ(X.ndim(), 2);
|
||||
TIndex N = X.dim32(0);
|
||||
TIndex M = X.dim32(1);
|
||||
CAFFE_ENFORCE_EQ(
|
||||
M, 2, "If binary is set to true, the input must be Nx2 tensor");
|
||||
Y->ResizeLike(X);
|
||||
const auto* Xdata = X.template data<float>();
|
||||
float* Ydata = Y->template mutable_data<float>();
|
||||
|
||||
for (TIndex i = 0; i < N; ++i) {
|
||||
Ydata[i * M + 1] =
|
||||
Piecewise_Linear_Transform(Xdata[i * M + 1], range_[0], W_[0], b_[0]);
|
||||
Ydata[i * M] = 1.0f - Ydata[i * M + 1];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
vector<vector<T>> SetPiecewiseLinearFunctionParameter(
|
||||
const string& arg,
|
||||
const int denom) {
|
||||
vector<vector<T>> param;
|
||||
vector<T> param_flat = OperatorBase::GetRepeatedArgument<T>(arg);
|
||||
CAFFE_ENFORCE_EQ(param_flat.size() % denom, 0);
|
||||
int num_dim = param_flat.size() / denom;
|
||||
CAFFE_ENFORCE_GT(num_dim, 0);
|
||||
param.resize(num_dim);
|
||||
for (int i = 0; i < num_dim; i++) {
|
||||
param[i] = vector<T>(
|
||||
|
|
@ -86,6 +123,7 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
|
|||
vector<vector<T>> range_;
|
||||
vector<vector<T>> W_;
|
||||
vector<vector<T>> b_;
|
||||
bool binary_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -2076,38 +2076,6 @@ class TestOperators(hu.HypothesisTestCase):
|
|||
self.assertDeviceChecks(dc, op, [X], [0])
|
||||
self.assertGradientChecks(gc, op, [X], 0, [0])
|
||||
|
||||
@given(n=st.integers(10000, 10003), **hu.gcs_cpu_only)
|
||||
def test_piecewise_linear_transform(self, n, gc, dc):
|
||||
W = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
|
||||
b = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
|
||||
# make sure bucket range are increating!
|
||||
bucket_range = np.random.uniform(0.1, 0.9,
|
||||
(2, n + 1)).astype(np.float32)
|
||||
bucket_base = np.array(list(range(n + 1)))
|
||||
bucket_range[0, :] = bucket_range[0, :] + bucket_base
|
||||
bucket_range[1, :] = bucket_range[1, :] + bucket_base
|
||||
# make x[i] inside bucket i, for the ease of testing
|
||||
X = np.random.uniform(0, 0.9, (n, 2)).astype(np.float32)
|
||||
for i in range(len(X)):
|
||||
X[i][0] = X[i][0] * bucket_range[0][i] + \
|
||||
(1 - X[i][0]) * bucket_range[0][i + 1]
|
||||
X[i][1] = X[i][1] * bucket_range[1][i] + \
|
||||
(1 - X[i][1]) * bucket_range[1][i + 1]
|
||||
|
||||
op = core.CreateOperator(
|
||||
"PiecewiseLinearTransform", ["X"], ["Y"],
|
||||
bounds=bucket_range.flatten().tolist(),
|
||||
slopes=W.flatten().tolist(),
|
||||
intercepts=b.flatten().tolist(),
|
||||
pieces=n
|
||||
)
|
||||
|
||||
def piecewise(x, *args, **kw):
|
||||
return [W.transpose() * x + b.transpose()]
|
||||
|
||||
self.assertReferenceChecks(gc, op, [X], piecewise)
|
||||
self.assertDeviceChecks(dc, op, [X], [0])
|
||||
|
||||
@given(X=hu.tensor(min_dim=1,
|
||||
max_dim=4,
|
||||
elements=st.floats(min_value=-100, max_value=100)),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from caffe2.python import core
|
||||
from hypothesis import given
|
||||
import hypothesis.strategies as st
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import numpy as np
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
class TestPiecewiseLinearTransform(hu.HypothesisTestCase):
|
||||
@given(n=st.integers(1, 100), **hu.gcs_cpu_only)
|
||||
def test_piecewise_linear_transform_general(self, n, gc, dc):
|
||||
W = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
|
||||
b = np.random.uniform(-1, 1, (2, n)).astype(np.float32)
|
||||
# make sure bucket range are increating!
|
||||
bucket_range = np.random.uniform(0.1, 0.9,
|
||||
(2, n + 1)).astype(np.float32)
|
||||
bucket_base = np.array(list(range(n + 1)))
|
||||
bucket_range[0, :] = bucket_range[0, :] + bucket_base
|
||||
bucket_range[1, :] = bucket_range[1, :] + bucket_base
|
||||
# make x[i] inside bucket i, for the ease of testing
|
||||
X = np.random.uniform(0, 0.9, (n, 2)).astype(np.float32)
|
||||
for i in range(len(X)):
|
||||
X[i][0] = X[i][0] * bucket_range[0][i] + \
|
||||
(1 - X[i][0]) * bucket_range[0][i + 1]
|
||||
X[i][1] = X[i][1] * bucket_range[1][i] + \
|
||||
(1 - X[i][1]) * bucket_range[1][i + 1]
|
||||
|
||||
op = core.CreateOperator(
|
||||
"PiecewiseLinearTransform", ["X"], ["Y"],
|
||||
bounds=bucket_range.flatten().tolist(),
|
||||
slopes=W.flatten().tolist(),
|
||||
intercepts=b.flatten().tolist(),
|
||||
pieces=n
|
||||
)
|
||||
|
||||
def piecewise(x, *args, **kw):
|
||||
return [W.transpose() * x + b.transpose()]
|
||||
|
||||
self.assertReferenceChecks(gc, op, [X], piecewise)
|
||||
self.assertDeviceChecks(dc, op, [X], [0])
|
||||
|
||||
@given(n=st.integers(1, 100), **hu.gcs_cpu_only)
|
||||
def test_piecewise_linear_transform_binary(self, n, gc, dc):
|
||||
W = np.random.uniform(-1, 1, size=n).astype(np.float32)
|
||||
b = np.random.uniform(-1, 1, size=n).astype(np.float32)
|
||||
bucket_range = np.random.uniform(
|
||||
0, 1, n + 1).astype(np.float32)
|
||||
bucket_range.sort()
|
||||
|
||||
# make x[i] inside bucket i, for the ease of testing
|
||||
X = np.random.uniform(0, 0.9, (n, 2)).astype(np.float32)
|
||||
for i in range(len(X)):
|
||||
X[i][1] = X[i][1] * bucket_range[i] + \
|
||||
(1 - X[i][1]) * bucket_range[i + 1]
|
||||
X[:, 0] = 1 - X[:, 1]
|
||||
|
||||
op = core.CreateOperator(
|
||||
"PiecewiseLinearTransform", ["X"], ["Y"],
|
||||
bounds=bucket_range.flatten().tolist(),
|
||||
slopes=W.flatten().tolist(),
|
||||
intercepts=b.flatten().tolist(),
|
||||
pieces=n,
|
||||
binary=True,
|
||||
)
|
||||
|
||||
def piecewise(x):
|
||||
positive = W.transpose() * x[:, 1] + b.transpose()
|
||||
return [np.vstack((1 - positive, positive)).transpose()]
|
||||
|
||||
self.assertReferenceChecks(gc, op, [X], piecewise)
|
||||
self.assertDeviceChecks(dc, op, [X], [0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue
Block a user