pytorch/caffe2/python/op/python_op.cpp
2016-07-21 11:26:41 -07:00

257 lines
7.4 KiB
C++

#include <unordered_map>
#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/tensor.h"
// Produce deprecation warnings (needs to come before arrayobject.h inclusion).
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <boost/make_shared.hpp>
#include <boost/python.hpp>
#include <boost/python/raw_function.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include <numpy/arrayobject.h>
// Temporary solution for numpy < 1.7 versions: old macro, no promises.
// You're strongly advised to upgrade to >= 1.7.
#ifndef NPY_ARRAY_C_CONTIGUOUS
#define NPY_ARRAY_C_CONTIGUOUS NPY_C_CONTIGUOUS
#define PyArray_SetBaseObject(arr, x) (PyArray_BASE(arr) = (x))
#endif
namespace caffe2 {
namespace bp = boost::python;
namespace detail {
class PythonGuard : private boost::noncopyable {
public:
PythonGuard() : gstate_(PyGILState_Ensure()) {}
~PythonGuard() {
PyGILState_Release(gstate_);
}
private:
PyGILState_STATE gstate_;
};
using FuncRegistery = std::unordered_map<std::string, bp::object>;
static FuncRegistery& gRegistery() {
// Always leak the objects registered here.
static FuncRegistery* r = new FuncRegistery();
return *r;
}
bp::object& getFunc(const std::string& token) {
return gRegistery()[token];
}
bp::object& getGradientFunc(const std::string& token) {
return gRegistery()[token + "_gradient"];
}
std::string registerFunc(const bp::object& func) {
CHECK(!func.is_none());
const std::string name = bp::extract<std::string>(func.attr("__name__"));
// Unique name since registry is never cleared.
const std::string token = name + std::to_string(gRegistery().size());
CHECK(gRegistery().find(name) == gRegistery().end());
gRegistery()[token] = func;
return token;
}
bp::object registerGradientFunc(
const bp::object& token_,
const bp::object& func) {
CHECK(!token_.is_none());
CHECK(!func.is_none());
const std::string token = bp::extract<std::string>(token_);
CHECK(gRegistery().find(token) != gRegistery().end());
gRegistery()[token + "_gradient"] = func;
return bp::object();
}
struct NdarrayConverterGenerator {
template <typename T>
struct apply;
};
template <>
struct NdarrayConverterGenerator::apply<float*> {
struct type {
PyObject* operator()(float* data) const {
// Just store the data pointer, and add the shape information in postcall.
return PyArray_SimpleNewFromData(0, nullptr, NPY_FLOAT32, data);
}
const PyTypeObject* get_pytype() {
return &PyArray_Type;
}
};
};
struct NdarrayCallPolicies : public bp::default_call_policies {
typedef NdarrayConverterGenerator result_converter;
PyObject* postcall(PyObject* pyargs, PyObject* result) {
bp::object pyblob = bp::extract<bp::tuple>(pyargs)()[0];
boost::shared_ptr<TensorCPU> blob =
bp::extract<boost::shared_ptr<TensorCPU>>(pyblob);
// Free the temporary pointer-holding array, and construct a new one with
// the shape information from the blob.
void* data = PyArray_DATA(reinterpret_cast<PyArrayObject*>(result));
Py_DECREF(result);
const int num_axes = blob->ndim();
std::vector<npy_intp> dims(blob->dims().begin(), blob->dims().end());
PyObject* arr_obj =
PyArray_SimpleNewFromData(num_axes, dims.data(), NPY_FLOAT32, data);
// SetBaseObject steals a ref, so we need to INCREF.
Py_INCREF(pyblob.ptr());
PyArray_SetBaseObject(
reinterpret_cast<PyArrayObject*>(arr_obj), pyblob.ptr());
return arr_obj;
}
};
bp::tuple TensorCPU_shape(const TensorCPU& t) {
return bp::tuple(t.dims());
}
bp::object TensorCPU_reshape(TensorCPU* t, const bp::tuple& dims_) {
std::vector<TIndex> dims;
dims.reserve(bp::len(dims_));
for (auto i = 0; i < bp::len(dims_); ++i) {
dims.push_back(bp::extract<int64_t>(dims_[i]));
}
t->Resize(dims);
return bp::object();
}
}
class PythonOpBase : public Operator<CPUContext> {
public:
using Operator::Operator;
bool RunOnDevice() final {
std::vector<TensorCPU*> inputs;
inputs.reserve(InputSize());
for (auto i = 0; i < InputSize(); ++i) {
inputs.push_back(const_cast<TensorCPU*>(&Input(i)));
}
std::vector<TensorCPU*> outputs;
outputs.reserve(OutputSize());
for (auto i = 0; i < OutputSize(); ++i) {
outputs.push_back(Output(i));
}
auto& pyFunc = getFunc();
CHECK(!pyFunc.is_none());
{
detail::PythonGuard g;
try {
pyFunc(inputs, outputs);
} catch (bp::error_already_set&) {
PyErr_Print();
LOG(FATAL) << "Exception in Python operator for token: "
<< OperatorBase::GetSingleArgument<std::string>("token", "");
}
}
return true;
}
private:
virtual bp::object& getFunc() = 0;
};
class PythonOp final : public PythonOpBase {
public:
using PythonOpBase::PythonOpBase;
private:
bp::object& getFunc() override {
const std::string& token =
OperatorBase::GetSingleArgument<std::string>("token", "");
return detail::getFunc(token);
}
};
class PythonGradientOp final : public PythonOpBase {
public:
using PythonOpBase::PythonOpBase;
private:
bp::object& getFunc() override {
const std::string& token =
OperatorBase::GetSingleArgument<std::string>("token", "");
return detail::getGradientFunc(token);
}
};
BOOST_PYTHON_MODULE(python_ops_python) {
bp::class_<TensorCPU, boost::shared_ptr<TensorCPU>, boost::noncopyable>(
"TensorCPU")
.add_property(
"data",
bp::make_function(
&TensorCPU::template mutable_data<float>,
detail::NdarrayCallPolicies()))
.add_property("shape", detail::TensorCPU_shape)
.def("reshape", detail::TensorCPU_reshape);
bp::class_<std::vector<TensorCPU*>>("RawTensorVec")
.def(bp::vector_indexing_suite<std::vector<TensorCPU*>, true>());
bp::class_<vector<TIndex>>("IntVec").def(
bp::vector_indexing_suite<std::vector<TIndex>>());
bp::def(
"register",
detail::registerFunc,
bp::args("func"),
"Register a function, returning a token");
bp::def(
"register_gradient",
detail::registerGradientFunc,
bp::args("token", "func"),
"Register a gradient function for a token");
// boost python expects a void (missing) return value, while import_array
// returns NULL for python3. import_array1() forces a void return value.
import_array1();
}
namespace {
struct GetPythonGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
std::vector<OperatorDef> GetGradientDefs() override {
std::vector<std::string> gradientInputs;
for (int i = 0; i < def_.input_size(); ++i) {
gradientInputs.push_back(I(i));
}
for (int i = 0; i < def_.output_size(); ++i) {
gradientInputs.push_back(O(i));
}
for (int i = 0; i < def_.output_size(); ++i) {
gradientInputs.push_back(GO(i));
}
std::vector<std::string> gradientOutputs;
for (int i = 0; i < def_.input_size(); ++i) {
gradientOutputs.push_back(GI(i));
}
return SingleGradientDef(
"PythonGradient", "", gradientInputs, gradientOutputs);
}
};
REGISTER_CPU_OPERATOR(Python, PythonOp);
REGISTER_CPU_OPERATOR(PythonGradient, PythonGradientOp);
// Always allow running in-place
OPERATOR_SCHEMA(Python).AllowInplace([](int, int) { return true; });
OPERATOR_SCHEMA(PythonGradient).AllowInplace([](int, int) { return true; });
REGISTER_GRADIENT(Python, GetPythonGradient);
}
}