mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: This PR adds a way to infer the JIT/script schema of a function from its signature, and then create an operator from the schema and implementation. The implementation function is wrapped into another function, which pops values from the stack into an argument tuple, then invokes the function and pushes the return value back onto the stack, sometimes unpacking the return value if it is a tuple. Currently the method is called `createOperator`. We may want to think of a nicer way of registering ops in tandem with `RegisterOperators`. It might be very cumbersome to add a template constructor to `Operator`, so maybe we can come up with a chaining method on `RegisterOperators` like `RegisterOperators(schema, func).op(schema.func).op(schema, func)` -- it has to work at startup time (for a static variable) though. We can solve this in another PR. zdevito apaszke smessmer dzhulgakov Pull Request resolved: https://github.com/pytorch/pytorch/pull/10048 Differential Revision: D9125975 Pulled By: goldsborough fbshipit-source-id: de9e59888757573284a43787ae5d94384bfe8f9a
1164 lines
36 KiB
C++
1164 lines
36 KiB
C++
#ifdef USE_CATCH
|
|
|
|
#define CATCH_CONFIG_MAIN
|
|
#include "catch.hpp"
|
|
|
|
using Catch::StartsWith;
|
|
|
|
#else
|
|
|
|
#define REQUIRE JIT_ASSERT
|
|
|
|
#endif
|
|
|
|
#include "torch/csrc/jit/assertions.h"
|
|
#include "torch/csrc/jit/fusion_compiler.h"
|
|
#include "torch/csrc/jit/code_template.h"
|
|
#include "torch/csrc/jit/ir.h"
|
|
#include "torch/csrc/jit/attributes.h"
|
|
#include "torch/csrc/jit/interned_strings.h"
|
|
#include "torch/csrc/jit/interpreter.h"
|
|
#include "torch/csrc/jit/symbolic_variable.h"
|
|
#include "torch/csrc/jit/autodiff.h"
|
|
#include "torch/csrc/jit/tracer.h"
|
|
#include "torch/csrc/jit/passes/create_autodiff_subgraphs.h"
|
|
#include "torch/csrc/autograd/variable.h"
|
|
#include "torch/csrc/utils/hash.h"
|
|
#include "torch/csrc/jit/argument_spec.h"
|
|
#include "torch/csrc/jit/passes/shape_analysis.h"
|
|
#include "torch/csrc/jit/passes/dead_code_elimination.h"
|
|
#include "torch/csrc/jit/passes/lower_grad_of.h"
|
|
#include "torch/csrc/jit/operator.h"
|
|
#include "torch/csrc/jit/custom_operator.h"
|
|
#include "torch/csrc/variable_tensor_functions.h"
|
|
|
|
#include "torch/csrc/autograd/variable.h"
|
|
#include "torch/csrc/autograd/engine.h"
|
|
#include "torch/csrc/jit/passes/shape_analysis.h"
|
|
|
|
#include "torch/csrc/jit/graph_executor.h"
|
|
#include "torch/csrc/jit/script/compiler.h"
|
|
#include "torch/csrc/jit/script/module.h"
|
|
#include "torch/csrc/jit/ivalue.h"
|
|
|
|
#include "onnx/onnx_pb.h"
|
|
|
|
|
|
#include <ATen/ATen.h>
|
|
|
|
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <unordered_set>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace torch { namespace jit {
|
|
|
|
using Var = SymbolicVariable;
|
|
|
|
using namespace torch::autograd;
|
|
|
|
template<typename T>
|
|
static std::ostream & operator<<(std::ostream & out, const std::vector<T> & list) {
|
|
size_t i = 0;
|
|
out << "{";
|
|
for(auto && e : list) {
|
|
if(i++ > 0)
|
|
out << ", ";
|
|
out << e;
|
|
}
|
|
out << "}";
|
|
return out;
|
|
}
|
|
static auto ct = CodeTemplate(R"(
|
|
int foo($args) {
|
|
|
|
$bar
|
|
$bar
|
|
$a+$b
|
|
}
|
|
int commatest(int a${,stuff})
|
|
int notest(int a${,empty,})
|
|
)");
|
|
static auto ct_expect = R"(
|
|
int foo(hi, 8) {
|
|
|
|
what
|
|
on many
|
|
lines...
|
|
7
|
|
what
|
|
on many
|
|
lines...
|
|
7
|
|
3+4
|
|
}
|
|
int commatest(int a, things..., others)
|
|
int notest(int a)
|
|
)";
|
|
|
|
static void codeTemplateTest() {
|
|
{
|
|
TemplateEnv e;
|
|
e.s("hi","foo");
|
|
e.v("what",{"is","this"});
|
|
TemplateEnv c(e);
|
|
c.s("hi","foo2");
|
|
REQUIRE(e.s("hi") == "foo");
|
|
REQUIRE(c.s("hi") == "foo2");
|
|
REQUIRE(e.v("what")[0] == "is");
|
|
}
|
|
|
|
{
|
|
TemplateEnv e;
|
|
e.v("args",{"hi","8"});
|
|
e.v("bar",{"what\non many\nlines...","7"});
|
|
e.s("a","3");
|
|
e.s("b","4");
|
|
e.v("stuff",{"things...","others"});
|
|
e.v("empty",{});
|
|
auto s = ct.format(e);
|
|
//std::cout << "'" << s << "'\n";
|
|
//std::cout << "'" << ct_expect << "'\n";
|
|
REQUIRE(s == ct_expect);
|
|
}
|
|
}
|
|
|
|
Value * appendNewNode(NodeKind kind, Graph& graph, ArrayRef<Value*> inputs) {
|
|
return graph.appendNode(graph.create(kind,inputs))->output();
|
|
}
|
|
|
|
|
|
static void fusionTests() {
|
|
FusionCompiler comp;
|
|
|
|
auto testSimple = [&] {
|
|
Graph graph;
|
|
Var i0 = Var::asNewInput(graph);
|
|
Var i1 = Var::asNewInput(graph);
|
|
auto o0 = i0 * i1;
|
|
o0.addAsOutput();
|
|
auto a = at::rand({3,4}, at::kCUDA);
|
|
auto b = at::rand({4,3}, at::kCUDA).transpose(0,1);
|
|
auto o = at::zeros({3,4}, at::kCUDA);
|
|
comp.debugLaunchGraph(graph, 0, {a,b}, {o});
|
|
auto o2 = a*b;
|
|
float max_diff = (o2 - o).abs().max().toCDouble();
|
|
//std::cout << "max diff: " << max_diff << "\n";
|
|
REQUIRE(max_diff == 0);
|
|
};
|
|
testSimple();
|
|
|
|
auto testOne = [&](int ti, int tj, int toi, int toj) {
|
|
|
|
Graph graph;
|
|
|
|
Var i0 = Var::asNewInput(graph);
|
|
Var i1 = Var::asNewInput(graph);
|
|
Var i2 = Var::asNewInput(graph);
|
|
Var i3 = Var::asNewInput(graph);
|
|
Var i4 = Var::asNewInput(graph);
|
|
|
|
auto p22 = i4.sigmoid();
|
|
auto p20 = i3.sigmoid();
|
|
auto p18 = i2.tanh();
|
|
auto p16 = i1.sigmoid();
|
|
auto p14 = p20 * i0;
|
|
auto p11 = p22 * p18;
|
|
auto o1 = p14 + p11;
|
|
auto p5 = o1.tanh();
|
|
auto o0 = p16 * p5;
|
|
o0.addAsOutput();
|
|
o1.addAsOutput();
|
|
|
|
graph.lint();
|
|
|
|
std::vector<at::Tensor> inputs;
|
|
std::vector<at::Tensor> outputs;
|
|
// We want to generate input/output tensors with dimension 128x128x32, but
|
|
// with different internal strides. To do this, we generate a tensor
|
|
// with the "wrong" dimensions, and then use transpose to get an appropriately
|
|
// sized view.
|
|
for(size_t i = 0; i < graph.inputs().size(); i++) {
|
|
std::vector<int64_t> dims = {128, 128, 32};
|
|
std::swap(dims[ti],dims[tj]);
|
|
inputs.push_back(at::rand(dims, at::kCUDA).transpose(ti, tj));
|
|
}
|
|
for(size_t i = 0; i < graph.outputs().size(); i++) {
|
|
std::vector<int64_t> dims = {128, 128, 32};
|
|
std::swap(dims[toi],dims[toj]);
|
|
outputs.push_back(at::zeros(dims, at::kCUDA).transpose(toi,toj));
|
|
}
|
|
|
|
auto t22 = inputs[4].sigmoid();
|
|
auto t20 = inputs[3].sigmoid();
|
|
auto t18 = inputs[2].tanh();
|
|
auto t16 = inputs[1].sigmoid();
|
|
auto t14 = t20*inputs[0];
|
|
auto t11 = t22*t18;
|
|
auto out1 = t14+t11;
|
|
auto t5 = out1.tanh();
|
|
auto out0 = t16*t5;
|
|
|
|
|
|
//auto out0 = inputs[0]*inputs[1];
|
|
comp.debugLaunchGraph(graph, 0, inputs, outputs);
|
|
REQUIRE(out0.is_same_size(outputs.front()));
|
|
float max_diff = (outputs.front() - out0).abs().max().toCDouble();
|
|
REQUIRE(max_diff < 1e-6);
|
|
|
|
};
|
|
testOne(0,0,0,0);
|
|
testOne(0,1,0,0);
|
|
testOne(1,2,0,0);
|
|
testOne(0,2,0,0);
|
|
|
|
testOne(0,0,0,1);
|
|
testOne(0,1,1,2);
|
|
testOne(1,2,0,2);
|
|
|
|
|
|
auto createFusedConcat = [](Graph & graph, at::ArrayRef<Value*> inputs, int64_t dim) -> Value* {
|
|
return graph.insertNode(graph.create(prim::FusedConcat, inputs)->i_(attr::dim, dim))->output();
|
|
};
|
|
|
|
auto testConcat = [&](int dim) {
|
|
Graph graph;
|
|
Var i0 = Var::asNewInput(graph);
|
|
Var i1 = Var::asNewInput(graph);
|
|
auto o0 = i0 * i1;
|
|
o0.addAsOutput();
|
|
Var(createFusedConcat(graph, {i0, o0}, dim)).addAsOutput();
|
|
|
|
auto a = at::rand({3,4,5}, at::kCUDA);
|
|
auto b = at::rand({4,3,5}, at::kCUDA).transpose(0,1);
|
|
auto o = at::zeros({3,4,5}, at::kCUDA);
|
|
|
|
auto o_r = a*b;
|
|
auto o2_r = at::cat({a, o_r}, dim);
|
|
auto o2 = at::zeros(o2_r.sizes(), at::kCUDA);
|
|
comp.debugLaunchGraph(graph, 0, {a,b}, {o, o2});
|
|
|
|
float max_diff = (o_r - o).abs().max().toCDouble();
|
|
REQUIRE(max_diff == 0);
|
|
float max_diff2 = (o2_r - o2).abs().max().toCDouble();
|
|
REQUIRE(max_diff2 == 0);
|
|
};
|
|
testConcat(0);
|
|
testConcat(1);
|
|
testConcat(2);
|
|
}
|
|
|
|
struct Attr : public Attributes<Attr> {
|
|
};
|
|
void attributesTest() {
|
|
auto one = attr::alpha;
|
|
auto two = attr::device;
|
|
auto three = attr::end;
|
|
auto four = attr::perm;
|
|
Attr attr;
|
|
attr.f_(one,3.4)->i_(two,5)->s_(three,"what");
|
|
REQUIRE(attr.f(one) == 3.4);
|
|
REQUIRE(attr.s(three) == "what");
|
|
REQUIRE(attr.i(two) == 5);
|
|
attr.s_(one,"no");
|
|
REQUIRE(attr.s(one) == "no");
|
|
REQUIRE(attr.hasAttribute(three));
|
|
REQUIRE(!attr.hasAttribute(four));
|
|
attr.ss_(two, {"hi", "now"});
|
|
REQUIRE(attr.ss(two).at(1) == "now");
|
|
|
|
Attr attr2;
|
|
attr2.copyAttributes(attr);
|
|
REQUIRE(attr2.s(one) == "no");
|
|
attr2.f_(one,5);
|
|
REQUIRE(attr.s(one) == "no");
|
|
REQUIRE(attr2.f(one) == 5);
|
|
}
|
|
|
|
void internedStringsTests () {
|
|
|
|
REQUIRE(prim::Param == Symbol::prim("Param"));
|
|
REQUIRE(prim::Return == Symbol::prim("Return"));
|
|
REQUIRE(prim::Return.toUnqualString() == std::string("Return"));
|
|
REQUIRE(prim::Return.toQualString() == std::string("prim::Return"));
|
|
Symbol newsym = Symbol::aten("__NEW_SYMBOL");
|
|
size_t symstart = newsym;
|
|
REQUIRE(newsym.toQualString() == std::string("aten::__NEW_SYMBOL"));
|
|
// TODO: This test is a bit too close to the implementation details.
|
|
REQUIRE(Symbol::aten("What") == symstart+1);
|
|
REQUIRE(Symbol::aten("What2") == symstart+2);
|
|
REQUIRE(Symbol::aten("What") == symstart+1);
|
|
REQUIRE(Symbol::aten("What2") == symstart+2);
|
|
REQUIRE(Symbol(symstart+2).toUnqualString() == std::string("What2"));
|
|
}
|
|
|
|
void fromQualStringTests() {
|
|
REQUIRE(Symbol::fromQualString("prim::Param") == Symbol::prim("Param"));
|
|
REQUIRE(Symbol::fromQualString("aten::mm") == Symbol::aten("mm"));
|
|
REQUIRE(Symbol::fromQualString("onnx::LSTM") == Symbol::onnx("LSTM"));
|
|
REQUIRE(Symbol::fromQualString("attr::value") == Symbol::attr("value"));
|
|
REQUIRE(Symbol::fromQualString("scope::") == Symbol::scope(""));
|
|
REQUIRE(Symbol::fromQualString("::").toUnqualString() == std::string(""));
|
|
REQUIRE(Symbol::fromQualString("::").ns().toQualString() == std::string("namespaces::"));
|
|
REQUIRE(Symbol::fromQualString("new_ns::param").toUnqualString() == std::string("param"));
|
|
REQUIRE(Symbol::fromQualString("new_ns::param").ns().toUnqualString() == std::string("new_ns"));
|
|
REQUIRE(Symbol::fromQualString("new_ns::param").ns() == Symbol::fromQualString("namespaces::new_ns"));
|
|
|
|
auto bad_inputs = {"scope", ":", ""};
|
|
for (auto input : bad_inputs) {
|
|
try {
|
|
Symbol::fromQualString(input);
|
|
REQUIRE(0);
|
|
} catch (std::runtime_error c) {
|
|
}
|
|
}
|
|
}
|
|
|
|
at::Tensor t_use(at::Tensor x) {
|
|
return x;
|
|
}
|
|
at::Tensor t_def(at::Tensor x) {
|
|
return x.t();
|
|
}
|
|
|
|
// given the difference of output vs expected tensor, check whether the
|
|
// difference is within a relative tolerance range. This is a standard way of
|
|
// matching tensor values upto certain precision
|
|
bool checkRtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs) {
|
|
double maxValue = 0.0;
|
|
for (auto& tensor : inputs) {
|
|
maxValue = fmax(tensor.abs().max().toCFloat(), maxValue);
|
|
}
|
|
return diff.abs().max().toCFloat() < 2e-6 * maxValue;
|
|
}
|
|
bool almostEqual(const at::Tensor & a, const at::Tensor & b) {
|
|
return checkRtol(a - b,{a, b});
|
|
}
|
|
|
|
bool exactlyEqual(const at::Tensor & a, const at::Tensor & b) {
|
|
return (a - b).abs().max().toCFloat() == 0.f;
|
|
}
|
|
|
|
std::pair<at::Tensor, at::Tensor>
|
|
lstm(at::Tensor input,
|
|
at::Tensor hx,
|
|
at::Tensor cx,
|
|
at::Tensor w_ih,
|
|
at::Tensor w_hh) {
|
|
auto gates = input.mm(t_use(w_ih)) + hx.mm(t_use(w_hh));
|
|
|
|
auto chunked_gates = gates.chunk(4, 1);
|
|
auto ingate = chunked_gates[0];
|
|
auto forgetgate = chunked_gates[1];
|
|
auto cellgate = chunked_gates[2];
|
|
auto outgate = chunked_gates[3];
|
|
|
|
ingate = ingate.sigmoid();
|
|
outgate = outgate.sigmoid();
|
|
cellgate = cellgate.tanh();
|
|
forgetgate = forgetgate.sigmoid();
|
|
|
|
auto cy = (forgetgate * cx) + (ingate * cellgate);
|
|
auto hy = outgate * cy.tanh();
|
|
|
|
return {hy, cy};
|
|
}
|
|
|
|
std::tuple<Var, Var> build_lstm_body(
|
|
Graph & g,
|
|
Var input,
|
|
Var hx,
|
|
Var cx,
|
|
Var w_ih,
|
|
Var w_hh) {
|
|
auto gates = input.mm(w_ih);
|
|
gates = gates + hx.mm(w_hh);
|
|
auto outputs = gates.chunk(4, 1);
|
|
auto ingate = outputs[0];
|
|
auto forgetgate = outputs[1];
|
|
auto cellgate = outputs[2];
|
|
auto outgate = outputs[3];
|
|
ingate = ingate.sigmoid();
|
|
outgate = outgate.sigmoid();
|
|
cellgate = cellgate.tanh();
|
|
forgetgate = forgetgate.sigmoid();
|
|
|
|
auto cy = forgetgate*cx;
|
|
cy = cy + ingate*cellgate;
|
|
auto hy = outgate*cy.tanh();
|
|
|
|
return std::make_tuple(hy,cy);
|
|
}
|
|
|
|
std::shared_ptr<Graph> build_lstm() {
|
|
auto r = std::make_shared<Graph>();
|
|
auto & g = *r;
|
|
Value * input = g.addInput();
|
|
Value * hx = g.addInput();
|
|
Value * cx = g.addInput();
|
|
Value * w_ih = g.addInput();
|
|
Value * w_hh = g.addInput();
|
|
|
|
Var hy;
|
|
Var cy;
|
|
std::tie(hy,cy) = build_lstm_body(g, input, hx, cx, w_ih, w_hh);
|
|
|
|
hy.addAsOutput();
|
|
cy.addAsOutput();
|
|
g.lint();
|
|
|
|
return r;
|
|
}
|
|
|
|
std::shared_ptr<Graph> build_lstm_stages() {
|
|
auto r = std::make_shared<Graph>();
|
|
auto & g = *r;
|
|
Var input = g.addInput();
|
|
Var hx = g.addInput();
|
|
Var cx = g.addInput();
|
|
Var w_ih = g.addInput();
|
|
Var w_hh = g.addInput();
|
|
|
|
Var hy;
|
|
Var cy;
|
|
std::tie(hy,cy) = build_lstm_body(g, input, hx, cx, w_ih, w_hh);
|
|
|
|
// use some stuff from the previous stage as well
|
|
// as a new input
|
|
g.advanceStage();
|
|
hx = hy;
|
|
cy.addAsOutput();
|
|
cx = g.addInput();
|
|
|
|
std::tie(hy,cy) = build_lstm_body(g, input, hx, cx, w_ih, w_hh);
|
|
|
|
hy.addAsOutput();
|
|
cy.addAsOutput();
|
|
g.lint();
|
|
|
|
return r;
|
|
}
|
|
|
|
void runOneStage(InterpreterState & interp, const std::vector<at::Tensor> & inputs, std::vector<at::Tensor> & outputs) {
|
|
std::vector<IValue> stack(inputs.begin(), inputs.end());
|
|
interp.runOneStage(stack);
|
|
outputs.clear();
|
|
for(auto & ivalue : stack) {
|
|
outputs.push_back(std::move(ivalue).toTensor());
|
|
}
|
|
}
|
|
|
|
void interpTest() {
|
|
constexpr int batch_size = 4;
|
|
constexpr int input_size = 256;
|
|
constexpr int seq_len = 32;
|
|
|
|
int hidden_size = 2*input_size;
|
|
|
|
auto input = at::randn({seq_len, batch_size, input_size}, at::kCUDA);
|
|
auto hx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto cx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto w_ih = t_def(at::randn({4 * hidden_size, input_size}, at::kCUDA));
|
|
auto w_hh = t_def(at::randn({4 * hidden_size, hidden_size}, at::kCUDA));
|
|
|
|
auto lstm_g = build_lstm();
|
|
Code lstm_function(lstm_g);
|
|
std::vector<at::Tensor> outputs;
|
|
InterpreterState lstm_interp(lstm_function);
|
|
runOneStage(lstm_interp, {input[0], hx, cx, w_ih, w_hh}, outputs);
|
|
std::tie(hx, cx) = lstm(input[0], hx, cx, w_ih, w_hh);
|
|
|
|
//std::cout << almostEqual(outputs[0],hx) << "\n";
|
|
REQUIRE(exactlyEqual(outputs[0],hx));
|
|
REQUIRE(exactlyEqual(outputs[1],cx));
|
|
}
|
|
|
|
void interpStageTest() {
|
|
constexpr int batch_size = 4;
|
|
constexpr int input_size = 256;
|
|
constexpr int seq_len = 32;
|
|
|
|
int hidden_size = 2*input_size;
|
|
auto input = at::randn({seq_len, batch_size, input_size}, at::kCUDA);
|
|
auto hx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto cx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto cx1 = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto w_ih = t_def(at::randn({4 * hidden_size, input_size}, at::kCUDA));
|
|
auto w_hh = t_def(at::randn({4 * hidden_size, hidden_size}, at::kCUDA));
|
|
|
|
|
|
auto lstm_g = build_lstm_stages();
|
|
Code lstm_function(lstm_g);
|
|
std::vector<at::Tensor> outputs;
|
|
InterpreterState lstm_interp(lstm_function);
|
|
runOneStage(lstm_interp, {input[0], hx, cx, w_ih, w_hh}, outputs);
|
|
auto cy0 = outputs[0];
|
|
runOneStage(lstm_interp, {cx1}, outputs);
|
|
at::Tensor ihx = outputs[0];
|
|
at::Tensor icx = outputs[1];
|
|
|
|
|
|
std::tie(hx, cx) = lstm(input[0], hx, cx, w_ih, w_hh);
|
|
std::tie(hx, cx) = lstm(input[0], hx, cx1, w_ih, w_hh);
|
|
|
|
//std::cout << almostEqual(outputs[0],hx) << "\n";
|
|
REQUIRE(exactlyEqual(outputs[0],hx));
|
|
REQUIRE(exactlyEqual(outputs[1],cx));
|
|
}
|
|
|
|
using var_meta_type = std::vector<int64_t>;
|
|
using var_meta_list = std::vector<var_meta_type>;
|
|
using test_fn_type = std::function<variable_list(const variable_list&)>;
|
|
|
|
struct ADTestSpec {
|
|
ADTestSpec(const char *name, var_meta_list input_meta, test_fn_type test_fn)
|
|
: name(name)
|
|
, input_meta(input_meta)
|
|
, test_fn(test_fn) {}
|
|
|
|
variable_list operator()(const variable_list& inputs) const {
|
|
return test_fn(inputs);
|
|
};
|
|
|
|
std::vector<Variable> make_vars() const {
|
|
std::vector<Variable> out;
|
|
for (const auto & m : input_meta) {
|
|
out.emplace_back(autograd::make_variable(at::CPU(at::kFloat).tensor(m).normal_(), /*requires_grad=*/true));
|
|
}
|
|
return out;
|
|
}
|
|
|
|
const char *name;
|
|
var_meta_list input_meta;
|
|
test_fn_type test_fn;
|
|
};
|
|
|
|
variable_list get_grad_outputs(const variable_list& vars) {
|
|
return fmap(vars, [](const Variable& v) -> Variable {
|
|
return v.type().tensor(v.sizes()).normal_();
|
|
});
|
|
}
|
|
|
|
std::shared_ptr<Graph> trace(const ADTestSpec& test, const variable_list& vars_in) {
|
|
std::shared_ptr<tracer::TracingState> state;
|
|
variable_list trace_vars_in;
|
|
std::tie(state, trace_vars_in) = tracer::enter(vars_in);
|
|
auto trace_vars_out = test(trace_vars_in);
|
|
tracer::exit(trace_vars_out);
|
|
return state->graph;
|
|
}
|
|
|
|
variable_list grad(const variable_list& outputs, const variable_list& inputs, const variable_list& grad_outputs) {
|
|
static const auto get_edge = [](const Variable& v) { return v.gradient_edge(); };
|
|
auto & engine = torch::autograd::Engine::get_default_engine();
|
|
return engine.execute(fmap(outputs, get_edge), grad_outputs, true, false, fmap(inputs, get_edge));
|
|
}
|
|
|
|
void assertAllClose(const tensor_list& a, const tensor_list& b) {
|
|
REQUIRE(a.size() == b.size());
|
|
for (size_t i = 0; i < a.size(); ++i) {
|
|
REQUIRE(a[i].is_same_size(b[i]));
|
|
REQUIRE(a[i].allclose(b[i]));
|
|
}
|
|
}
|
|
|
|
std::pair<tensor_list, tensor_list> runGradient(Gradient& grad_spec,
|
|
tensor_list& tensors_in,
|
|
tensor_list& tensor_grads_in) {
|
|
tensor_list tensors_out, tensor_grads_out;
|
|
Code f_code{grad_spec.f},
|
|
df_code{grad_spec.df};
|
|
InterpreterState f_interpreter { f_code }, df_interpreter { df_code };
|
|
|
|
runOneStage(f_interpreter, tensors_in, tensors_out);
|
|
|
|
tensor_list df_inputs;
|
|
df_inputs.insert(df_inputs.end(), tensor_grads_in.begin(), tensor_grads_in.end());
|
|
for(auto offset : grad_spec.df_input_captured_inputs)
|
|
df_inputs.push_back(tensors_in[offset]);
|
|
for(auto offset : grad_spec.df_input_captured_outputs)
|
|
df_inputs.push_back(tensors_out[offset]);
|
|
runOneStage(df_interpreter, df_inputs, tensor_grads_out);
|
|
|
|
// Outputs of f needs to be sliced
|
|
tensors_out.erase(tensors_out.begin() + grad_spec.f_real_outputs, tensors_out.end());
|
|
return std::make_pair(tensors_out, tensor_grads_out);
|
|
}
|
|
|
|
void testADFormulas() {
|
|
static const auto unwrap = [](const Variable& v) { return v.data(); };
|
|
|
|
using VL = variable_list;
|
|
static const var_meta_list binary_pointwise = {{2, 3, 4, 5}, {2, 3, 4, 5}};
|
|
static const var_meta_list unary_pointwise = {{2, 3, 4, 5}};
|
|
static const var_meta_list unary_pointwise_2d = {{2, 3}};
|
|
static const std::vector<ADTestSpec> ad_tests = {
|
|
{"add", binary_pointwise, [](const VL& v) -> VL { return {v[0] + v[1]}; }},
|
|
{"sub", binary_pointwise, [](const VL& v) -> VL { return {v[0] - v[1]}; }},
|
|
{"mul", binary_pointwise, [](const VL& v) -> VL { return {v[0] * v[1]}; }},
|
|
{"sigmoid", unary_pointwise, [](const VL& v) -> VL { return {v[0].sigmoid()}; }},
|
|
{"tanh", unary_pointwise, [](const VL& v) -> VL { return {v[0].tanh()}; }},
|
|
{"t", unary_pointwise_2d, [](const VL& v) -> VL { return {v[0].t()}; }},
|
|
{"mm", {{10, 12}, {12, 15}}, [](const VL& v) -> VL { return {v[0].mm(v[1])}; }},
|
|
{"chunk", {{10, 12, 15}}, [](const VL& v) -> VL { return fmap<Variable>(v[0].chunk(4, 1)); }},
|
|
{"chunk", {{10, 12, 15}}, [](const VL& v) -> VL { return fmap<Variable>(v[0].chunk(3, 2)); }},
|
|
{"split", {{10, 12, 15}}, [](const VL& v) -> VL { return fmap<Variable>(v[0].split(4, 1)); }},
|
|
{"split", {{10, 12, 15}}, [](const VL& v) -> VL { return fmap<Variable>(v[0].split(3, 2)); }},
|
|
};
|
|
|
|
for (const auto & test : ad_tests) {
|
|
// Get reference values form autograd
|
|
auto vars_in = test.make_vars();
|
|
auto vars_out = test(vars_in);
|
|
auto var_grads_in = get_grad_outputs(vars_out);
|
|
auto var_grads_out = grad(vars_out, vars_in, var_grads_in);
|
|
|
|
// Trace and differentiate the op
|
|
auto graph = trace(test, vars_in);
|
|
EliminateDeadCode(graph); // Tracing of some ops depends on the DCE trick
|
|
auto grad_spec = differentiate(graph, std::vector<bool>(vars_in.size(), true));
|
|
LowerGradOf(*grad_spec.df);
|
|
// Get outputs from the interpreter
|
|
auto tensors_in = fmap(vars_in, unwrap);
|
|
auto tensor_grads_in = fmap(var_grads_in, unwrap);
|
|
tensor_list tensors_out, tensor_grads_out;
|
|
std::tie(tensors_out, tensor_grads_out) = runGradient(grad_spec, tensors_in, tensor_grads_in);
|
|
|
|
// Compare results
|
|
auto expected_tensors_out = fmap(vars_out, unwrap);
|
|
auto expected_tensor_grads_out = fmap(var_grads_out, unwrap);
|
|
assertAllClose(tensors_out, expected_tensors_out);
|
|
assertAllClose(tensor_grads_out, expected_tensor_grads_out);
|
|
}
|
|
}
|
|
|
|
std::string toString(std::shared_ptr<Graph>& graph) {
|
|
std::ostringstream s;
|
|
s << *graph;
|
|
return s.str();
|
|
}
|
|
|
|
void testDifferentiate(std::ostream & out) {
|
|
auto graph = std::make_shared<Graph>();
|
|
at::ScalarType s = at::ScalarType::Float;
|
|
auto type = TensorType::create(s, -1, {2, 3, 4}, {12, 4, 1});
|
|
|
|
// Build up a fake graph
|
|
auto a = SymbolicVariable::asNewInput(*graph, type);
|
|
auto b = SymbolicVariable::asNewInput(*graph, type);
|
|
auto c = a * b * a + b;
|
|
graph->registerOutput(c.value());
|
|
|
|
auto grad_spec = differentiate(graph, {true, true});
|
|
std::vector<size_t> expected_captured_inputs = {0, 1};
|
|
std::vector<size_t> expected_captured_outputs = {1};
|
|
std::vector<size_t> expected_input_vjps = {0, 1};
|
|
std::vector<size_t> expected_output_vjps = {0, 1};
|
|
REQUIRE(grad_spec.f_real_outputs == 1);
|
|
REQUIRE(grad_spec.df_input_captured_inputs == expected_captured_inputs);
|
|
REQUIRE(grad_spec.df_input_captured_outputs == expected_captured_outputs);
|
|
REQUIRE(grad_spec.df_input_vjps == expected_input_vjps);
|
|
REQUIRE(grad_spec.df_output_vjps == expected_output_vjps);
|
|
out << "testDifferentiate\n";
|
|
out << *grad_spec.f;
|
|
out << *grad_spec.df;
|
|
out << "\n";
|
|
}
|
|
|
|
void testDifferentiateWithRequiresGrad(std::ostream & out) {
|
|
auto graph = std::make_shared<Graph>();
|
|
at::ScalarType s = at::ScalarType::Float;
|
|
auto type = TensorType::create(s, -1, {2, 3, 4}, {12, 4, 1});
|
|
|
|
// Build up a fake graph
|
|
auto a = SymbolicVariable::asNewInput(*graph, type);
|
|
auto b = SymbolicVariable::asNewInput(*graph, type);
|
|
auto d = b * b + b;
|
|
auto e = (d + a) * a + b;
|
|
graph->registerOutput(d.value());
|
|
graph->registerOutput(e.value());
|
|
|
|
auto grad_spec = differentiate(graph, {true, false});
|
|
std::vector<size_t> expected_input_vjps = {1, 2}; // for e and %4 = (d + a)
|
|
std::vector<size_t> expected_output_vjps = {0}; // only a requires grad
|
|
REQUIRE(grad_spec.f_real_outputs == 2); // we need one temporary %4 = (d + a)
|
|
REQUIRE(grad_spec.df_input_captured_inputs == std::vector<size_t>({0}));
|
|
REQUIRE(grad_spec.df_input_captured_outputs == std::vector<size_t>({2}));
|
|
REQUIRE(grad_spec.df_input_vjps == expected_input_vjps);
|
|
REQUIRE(grad_spec.df_output_vjps == expected_output_vjps);
|
|
out << "testDifferentiateWithRequiresGrad\n";
|
|
out << *grad_spec.f;
|
|
out << *grad_spec.df;
|
|
out << "\n";
|
|
}
|
|
|
|
void testCreateAutodiffSubgraphs(std::ostream & out) {
|
|
auto graph = build_lstm();
|
|
CreateAutodiffSubgraphs(*graph, /*threshold=*/2);
|
|
out << "testCreateAutodiffSubgraphs\n";
|
|
out << *graph << "\n";
|
|
}
|
|
|
|
autograd::Variable var(at::Type & t, at::IntList sizes, bool requires_grad) {
|
|
return autograd::make_variable(at::rand(sizes, t), requires_grad);
|
|
}
|
|
autograd::Variable undef() {
|
|
return autograd::Variable();
|
|
}
|
|
|
|
int device(const autograd::Variable & v) {
|
|
return v.type().is_cuda() ? v.get_device() : -1;
|
|
}
|
|
|
|
bool isEqual(at::IntList lhs, at::IntList rhs) {
|
|
return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
|
|
}
|
|
|
|
bool isEqual(const ArgumentInfo & ti, const autograd::Variable & v) {
|
|
REQUIRE(ti.isTensor());
|
|
if(!ti.defined())
|
|
return ti.defined() == v.defined();
|
|
return
|
|
ti.device() == device(v) &&
|
|
ti.requires_grad() == v.requires_grad() &&
|
|
ti.type() == v.type().scalarType() &&
|
|
isEqual(ti.sizes(), v.sizes()) &&
|
|
isEqual(ti.strides(), v.strides());
|
|
}
|
|
|
|
// work around the fact that variable_tensor_list doesn't duplicate all
|
|
// of std::vector's constructors.
|
|
// most constructors are never used in the implementation, just in our tests.
|
|
Stack createStack(std::vector<at::Tensor> && list) {
|
|
return Stack(std::make_move_iterator(list.begin()), std::make_move_iterator(list.end()));
|
|
}
|
|
|
|
void argumentSpecTest() {
|
|
auto & CF = at::CPU(at::kFloat);
|
|
auto & CD = at::CPU(at::kDouble);
|
|
auto & GF = at::CUDA(at::kFloat);
|
|
auto & GD = at::CUDA(at::kDouble);
|
|
|
|
auto list = createStack({ var(CF, {1}, true), var(CD, {1, 2}, false) , var(GF, {}, true), var(GD, {4,5,6}, false), undef()});
|
|
|
|
// make sure we have some non-standard strides
|
|
list[1].toTensor().transpose_(0, 1);
|
|
|
|
// same list but different backing values
|
|
auto list2 = createStack({ var(CF, {1}, true), var(CD, {1, 2}, false) , var(GF, {}, true), var(GD, {4,5,6}, false), undef()});
|
|
list2[1].toTensor().transpose_(0, 1);
|
|
|
|
|
|
ArgumentSpec a(true, list);
|
|
ArgumentSpec b(true, list);
|
|
REQUIRE(a.hashCode() == b.hashCode());
|
|
|
|
REQUIRE(a == b);
|
|
ArgumentSpec d(true, list2);
|
|
REQUIRE(d == a);
|
|
REQUIRE(d.hashCode() == a.hashCode());
|
|
|
|
for(size_t i = 0; i < list.size(); ++i) {
|
|
REQUIRE(isEqual(a.at(i), list[i].toTensor()));
|
|
}
|
|
ArgumentSpec no_grad(/*with_grad=*/false, list);
|
|
REQUIRE(no_grad != a);
|
|
|
|
std::unordered_set<ArgumentSpec> spec;
|
|
spec.insert(std::move(a));
|
|
REQUIRE(spec.count(b) > 0);
|
|
REQUIRE(spec.count(no_grad) == 0);
|
|
spec.insert(std::move(no_grad));
|
|
REQUIRE(spec.count(ArgumentSpec(true,list)) == 1);
|
|
|
|
list2[1].toTensor().transpose_(0,1);
|
|
ArgumentSpec c(true, list2); // same as list, except for one stride
|
|
REQUIRE(!(c == a));
|
|
REQUIRE(spec.count(c) == 0);
|
|
|
|
Stack stack = { var(CF, {1,2}, true), 3, var(CF, {1,2}, true) };
|
|
ArgumentSpec with_const(true, stack);
|
|
REQUIRE(with_const.at(2).sizes().size() == 2);
|
|
}
|
|
|
|
void shapeAnalysisTest() {
|
|
|
|
constexpr int batch_size = 4;
|
|
constexpr int input_size = 256;
|
|
|
|
int hidden_size = 2*input_size;
|
|
|
|
auto v = [](at::Tensor t) { return autograd::make_variable(t, false); };
|
|
|
|
auto input = at::randn({batch_size, input_size}, at::kCUDA);
|
|
auto hx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto cx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto w_ih = t_def(at::randn({4 * hidden_size, input_size}, at::kCUDA));
|
|
auto w_hh = t_def(at::randn({4 * hidden_size, hidden_size}, at::kCUDA));
|
|
|
|
auto g = build_lstm();
|
|
ArgumentSpec spec(false, createStack({v(input), v(hx), v(cx), v(w_ih), v(w_hh) }));
|
|
PropagateInputShapes(*g, spec);
|
|
at::Tensor r0, r1;
|
|
std::tie(r0, r1) = lstm(input, hx, cx, w_ih, w_hh);
|
|
auto o0 = g->outputs()[0]->type()->expect<TensorType>();
|
|
auto o1 = g->outputs()[1]->type()->expect<TensorType>();
|
|
REQUIRE(o0->sizes() == std::vector<int64_t>(r0.sizes().begin(), r0.sizes().end()));
|
|
REQUIRE(o1->sizes() == std::vector<int64_t>(r1.sizes().begin(), r1.sizes().end()));
|
|
|
|
}
|
|
|
|
void testGraphExecutor() {
|
|
constexpr int batch_size = 4;
|
|
constexpr int input_size = 256;
|
|
|
|
int hidden_size = 2*input_size;
|
|
|
|
auto v = [](at::Tensor t) { return autograd::make_variable(t, false); };
|
|
|
|
auto input = at::randn({batch_size, input_size}, at::kCUDA);
|
|
auto hx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto cx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto w_ih = t_def(at::randn({4 * hidden_size, input_size}, at::kCUDA));
|
|
auto w_hh = t_def(at::randn({4 * hidden_size, hidden_size}, at::kCUDA));
|
|
|
|
auto g = build_lstm();
|
|
GraphExecutor executor(g);
|
|
auto stack = createStack({v(input), v(hx), v(cx), v(w_ih), v(w_hh)});
|
|
executor.run(stack);
|
|
REQUIRE(stack.size() == 2);
|
|
at::Tensor r0, r1;
|
|
std::tie(r0, r1) = lstm(input, hx, cx, w_ih, w_hh);
|
|
REQUIRE(almostEqual(Variable(stack[0].toTensor()).data(), r0));
|
|
REQUIRE(almostEqual(Variable(stack[1].toTensor()).data(), r1));
|
|
}
|
|
|
|
void testBlocks(std::ostream & out) {
|
|
Graph g;
|
|
auto a = Var::asNewInput(g, "a");
|
|
auto b = Var::asNewInput(g, "b");
|
|
auto c = a + b;
|
|
auto r = g.appendNode(g.create(prim::If, {Var::asNewInput(g, "c").value()}));
|
|
auto then_block = r->addBlock();
|
|
auto else_block = r->addBlock();
|
|
{
|
|
WithInsertPoint guard(then_block);
|
|
auto t = c + c;
|
|
then_block->registerOutput(t.value());
|
|
}
|
|
{
|
|
WithInsertPoint guard(else_block);
|
|
auto d = b + c;
|
|
auto e = d + c;
|
|
else_block->registerOutput(e.value());
|
|
}
|
|
g.registerOutput((Var(r->output()) + c).value());
|
|
g.lint();
|
|
out << "testBlocks\n" << g << "\n";
|
|
r->eraseBlock(0);
|
|
out << g << "\n";
|
|
g.lint();
|
|
// test recursive copy of blocks works
|
|
auto g2 = g.copy();
|
|
out << *g2 << "\n";
|
|
}
|
|
|
|
|
|
const static auto cf_examples = R"JIT(
|
|
def if_test(a, b):
|
|
# FIXME: use 0 instead of a.
|
|
# c = 0
|
|
c = a
|
|
if a < b:
|
|
c = b
|
|
else:
|
|
c = a
|
|
return c
|
|
def if_one(a, b):
|
|
c = b
|
|
if a < b:
|
|
c = a
|
|
return c
|
|
def while_test(a, i):
|
|
while i < 3:
|
|
a *= a
|
|
i += 1
|
|
return a
|
|
)JIT";
|
|
void testControlFlow() {
|
|
script::Module cu;
|
|
script::defineMethodsInModule(cu, cf_examples, torch::jit::script::Resolver(), nullptr);
|
|
auto run = [&](const std::string & name, std::vector<IValue> stack) {
|
|
auto graph = cu.get_method(name).graph();
|
|
Code code(graph);
|
|
InterpreterState interp(code);
|
|
interp.runOneStage(stack);
|
|
return stack;
|
|
};
|
|
|
|
auto L = [](int64_t l) { return IValue(autograd::make_variable(at::Scalar(l).toTensor())); };
|
|
auto V = [](IValue t) { return at::Scalar(std::move(t).toTensor()).toLong(); };
|
|
auto run_binary = [&](const std::string & name, int64_t a, int64_t b) {
|
|
return V(run(name, {L(a), L(b)})[0]);
|
|
};
|
|
REQUIRE(2 == run_binary("if_test", 1, 2));
|
|
REQUIRE(3 == run_binary("if_test", 3, 2));
|
|
REQUIRE(2 == run_binary("if_one", 2, 3));
|
|
REQUIRE(2 == run_binary("if_one", 3, 2));
|
|
REQUIRE(256 == run_binary("while_test",2,0));
|
|
}
|
|
|
|
void testIValue() {
|
|
Shared<IntList> foo = IntList::create({3, 4, 5});
|
|
JIT_ASSERT(foo->use_count() == 1);
|
|
IValue bar(foo);
|
|
JIT_ASSERT(foo->use_count() == 2);
|
|
auto baz = bar;
|
|
JIT_ASSERT(foo->use_count() == 3);
|
|
auto foo2 = std::move(bar);
|
|
JIT_ASSERT(foo->use_count() == 3);
|
|
JIT_ASSERT(foo2.isIntList());
|
|
JIT_ASSERT(bar.isNone());
|
|
foo2 = IValue(4.0);
|
|
JIT_ASSERT(foo2.isDouble());
|
|
JIT_ASSERT(foo2.toDouble() == 4.0);
|
|
JIT_ASSERT(foo->use_count() == 2);
|
|
JIT_ASSERT(ArrayRef<int64_t>(baz.toIntList()->elements()).equals({3,4,5}));
|
|
|
|
auto move_it = std::move(baz).toIntList();
|
|
JIT_ASSERT(foo->use_count() == 2);
|
|
JIT_ASSERT(baz.isNone());
|
|
IValue i(4);
|
|
JIT_ASSERT(i.isInt() && i.toInt() == 4);
|
|
IValue dlist(DoubleList::create({3.5}));
|
|
JIT_ASSERT(
|
|
dlist.isDoubleList() &&
|
|
ArrayRef<double>(std::move(dlist).toDoubleList()->elements())
|
|
.equals({3.5}));
|
|
JIT_ASSERT(dlist.isNone());
|
|
dlist = IValue(DoubleList::create({3.4}));
|
|
JIT_ASSERT(ArrayRef<double>(dlist.toDoubleList()->elements()).equals({3.4}));
|
|
IValue the_list(Tuple::create({IValue(3.4), IValue(4), IValue(foo)}));
|
|
JIT_ASSERT(foo->use_count() == 3);
|
|
JIT_ASSERT(the_list.isTuple());
|
|
auto first = std::move(the_list).toTuple()->elements().at(1);
|
|
JIT_ASSERT(first.toInt() == 4);
|
|
at::Tensor tv = at::rand({3,4});
|
|
IValue ten(tv);
|
|
JIT_ASSERT(tv.get()->use_count() == 2);
|
|
auto ten2 = ten;
|
|
JIT_ASSERT(tv.get()->use_count() == 3);
|
|
JIT_ASSERT(ten2.toTensor().equal(ten.toTensor()));
|
|
std::move(ten2).toTensor();
|
|
JIT_ASSERT(tv.get()->use_count() == 2);
|
|
}
|
|
|
|
void testProto() {
|
|
::ONNX_NAMESPACE::ModelProto proto;
|
|
proto.set_producer_name("foo");
|
|
}
|
|
|
|
void testCustomOperators() {
|
|
{
|
|
RegisterOperators reg({createOperator(
|
|
"foo::bar", [](double a, at::Tensor b) { return a + b; })});
|
|
auto& ops = getAllOperatorsFor(Symbol::fromQualString("foo::bar"));
|
|
REQUIRE(ops.size() == 1);
|
|
|
|
auto& op = ops.front();
|
|
REQUIRE(op->schema().name == "foo::bar");
|
|
|
|
REQUIRE(op->schema().arguments.size() == 2);
|
|
REQUIRE(op->schema().arguments[0].name == "_0");
|
|
REQUIRE(op->schema().arguments[0].type->kind() == TypeKind::FloatType);
|
|
REQUIRE(op->schema().arguments[1].name == "_1");
|
|
REQUIRE(op->schema().arguments[1].type->kind() == TypeKind::DynamicType);
|
|
|
|
REQUIRE(op->schema().returns.size() == 1);
|
|
REQUIRE(op->schema().returns[0].type->kind() == TypeKind::DynamicType);
|
|
|
|
Stack stack;
|
|
push(stack, 2.0f, at::ones(5));
|
|
op->getOperation()(stack);
|
|
at::Tensor output;
|
|
pop(stack, output);
|
|
|
|
REQUIRE(output.allclose(at::full(5, 3.0f)));
|
|
}
|
|
{
|
|
RegisterOperators reg({createOperator(
|
|
"foo::bar_with_schema(float a, Tensor b) -> Tensor",
|
|
[](double a, at::Tensor b) { return a + b; })});
|
|
|
|
auto& ops =
|
|
getAllOperatorsFor(Symbol::fromQualString("foo::bar_with_schema"));
|
|
REQUIRE(ops.size() == 1);
|
|
|
|
auto& op = ops.front();
|
|
REQUIRE(op->schema().name == "foo::bar_with_schema");
|
|
|
|
REQUIRE(op->schema().arguments.size() == 2);
|
|
REQUIRE(op->schema().arguments[0].name == "a");
|
|
REQUIRE(op->schema().arguments[0].type->kind() == TypeKind::FloatType);
|
|
REQUIRE(op->schema().arguments[1].name == "b");
|
|
REQUIRE(op->schema().arguments[1].type->kind() == TypeKind::DynamicType);
|
|
|
|
REQUIRE(op->schema().returns.size() == 1);
|
|
REQUIRE(op->schema().returns[0].type->kind() == TypeKind::DynamicType);
|
|
|
|
Stack stack;
|
|
push(stack, 2.0f, at::ones(5));
|
|
op->getOperation()(stack);
|
|
at::Tensor output;
|
|
pop(stack, output);
|
|
|
|
REQUIRE(output.allclose(at::full(5, 3.0f)));
|
|
}
|
|
{
|
|
// Check that lists work well.
|
|
RegisterOperators reg({createOperator(
|
|
"foo::lists(int[] ints, float[] floats, Tensor[] tensors) -> float[]",
|
|
[](const std::vector<int64_t>& ints,
|
|
const std::vector<double>& floats,
|
|
std::vector<at::Tensor> tensors) { return floats; })});
|
|
|
|
auto& ops =
|
|
getAllOperatorsFor(Symbol::fromQualString("foo::lists"));
|
|
REQUIRE(ops.size() == 1);
|
|
|
|
auto& op = ops.front();
|
|
REQUIRE(op->schema().name == "foo::lists");
|
|
|
|
REQUIRE(op->schema().arguments.size() == 3);
|
|
REQUIRE(op->schema().arguments[0].name == "ints");
|
|
REQUIRE(op->schema().arguments[0].type->isSubtypeOf(ListType::ofInts()));
|
|
REQUIRE(op->schema().arguments[1].name == "floats");
|
|
REQUIRE(op->schema().arguments[1].type->isSubtypeOf(ListType::ofFloats()));
|
|
REQUIRE(op->schema().arguments[2].name == "tensors");
|
|
REQUIRE(op->schema().arguments[2].type->isSubtypeOf(ListType::ofTensors()));
|
|
|
|
REQUIRE(op->schema().returns.size() == 1);
|
|
REQUIRE(op->schema().returns[0].type->isSubtypeOf(ListType::ofFloats()));
|
|
|
|
Stack stack;
|
|
push(stack, std::vector<int64_t>{1, 2});
|
|
push(stack, std::vector<double>{1.0, 2.0});
|
|
push(stack, std::vector<at::Tensor>{at::ones(5)});
|
|
op->getOperation()(stack);
|
|
std::vector<double> output;
|
|
pop(stack, output);
|
|
|
|
REQUIRE(output.size() == 2);
|
|
REQUIRE(output[0] == 1.0);
|
|
REQUIRE(output[1] == 2.0);
|
|
}
|
|
{
|
|
#ifdef USE_CATCH
|
|
REQUIRE_THROWS_WITH(
|
|
createOperator(
|
|
"foo::bar_with_bad_schema(Tensor a) -> Tensor",
|
|
[](double a, at::Tensor b) { return a + b; }),
|
|
StartsWith("Inferred 2 argument(s) for operator implementation, "
|
|
"but the provided schema specified 1 argument(s)."));
|
|
REQUIRE_THROWS_WITH(
|
|
createOperator(
|
|
"foo::bar_with_bad_schema(Tensor a) -> Tensor",
|
|
[](double a) { return a; }),
|
|
StartsWith("Inferred type for argument #0 was float, "
|
|
"but the provided schema specified type Dynamic "
|
|
"for the argument in that position"));
|
|
REQUIRE_THROWS_WITH(
|
|
createOperator(
|
|
"foo::bar_with_bad_schema(float a) -> (float, float)",
|
|
[](double a) { return a; }),
|
|
StartsWith("Inferred 1 return value(s) for operator implementation, "
|
|
"but the provided schema specified 2 return value(s)."));
|
|
REQUIRE_THROWS_WITH(
|
|
createOperator(
|
|
"foo::bar_with_bad_schema(float a) -> Tensor",
|
|
[](double a) { return a; }),
|
|
StartsWith("Inferred type for return value #0 was float, "
|
|
"but the provided schema specified type Dynamic "
|
|
"for the return value in that position"));
|
|
#endif // USE_CATCH
|
|
}
|
|
}
|
|
|
|
TORCH_API std::string runJITCPPTests() {
|
|
std::stringstream out;
|
|
testIValue();
|
|
testControlFlow();
|
|
testGraphExecutor();
|
|
testBlocks(out);
|
|
testCreateAutodiffSubgraphs(out);
|
|
testDifferentiate(out);
|
|
testDifferentiateWithRequiresGrad(out);
|
|
testADFormulas();
|
|
interpTest();
|
|
interpStageTest();
|
|
codeTemplateTest();
|
|
fusionTests();
|
|
attributesTest();
|
|
internedStringsTests();
|
|
fromQualStringTests();
|
|
argumentSpecTest();
|
|
shapeAnalysisTest();
|
|
testProto();
|
|
testCustomOperators();
|
|
return out.str();
|
|
}
|
|
|
|
#ifdef USE_CATCH
|
|
|
|
TEST_CASE( "jit test CPU", "[cpu]" ) {
|
|
|
|
std::stringstream out;
|
|
SECTION( "control flow" )
|
|
testControlFlow();
|
|
SECTION( "blocks" )
|
|
testBlocks(out);
|
|
SECTION( "create autodiff subgraphs" )
|
|
testCreateAutodiffSubgraphs(out);
|
|
SECTION( "differentiate" )
|
|
testDifferentiate(out);
|
|
SECTION( "differentiate with requires grad" )
|
|
testDifferentiateWithRequiresGrad(out);
|
|
SECTION( "AD formulas" )
|
|
testADFormulas();
|
|
SECTION( "code template" )
|
|
codeTemplateTest();
|
|
SECTION( "attributes" )
|
|
attributesTest();
|
|
SECTION( "interned strings" )
|
|
internedStringsTests();
|
|
SECTION( "custom operators" )
|
|
testCustomOperators();
|
|
}
|
|
|
|
TEST_CASE( "jit test CUDA", "[cuda]" ) {
|
|
|
|
SECTION( "graph executor" )
|
|
testGraphExecutor();
|
|
SECTION( "fusion" )
|
|
fusionTests();
|
|
SECTION( "interp" )
|
|
interpTest();
|
|
SECTION( "interp stage" )
|
|
interpStageTest();
|
|
SECTION( "argument spec" )
|
|
argumentSpecTest();
|
|
SECTION( "shape analysis" )
|
|
shapeAnalysisTest();
|
|
}
|
|
|
|
#endif
|
|
|
|
}}
|