pytorch/test/cpp/api/rnn.cpp
Peter Goldsborough 271406f276
[C++ API] Make pImpl easy to use in modules to enable happy reference semantics (#8347)
* Created TORCH_MODULE macro

Rewrote Linear

Rewrote Dropout and added default constructor to TORCH_MODULE macro

Turned TORCH_MODULE contens into a proper base class

Added some documentation

Got rid of the old Dropout module

Got rid of the old Embedding module

Got rid of the old BatchNorm module

Got rid of the old Conv module

Fixing optimizers

Rebase

Removed old RNN modules and the TORCH_ATTR macro

Removed temporary P:: namespace

Added cloning behavior to all modules

Got rid of some get() calls

self review nits

Remove noexcept from ModuleHolder methods that can throw

Remove spaces

Add missing override to reset() methods

Added examples to documentation in pimpl.h

* Post rebase fixes
2018-06-18 19:45:53 -07:00

233 lines
6.2 KiB
C++

#include <catch.hpp>
#include <torch/torch.h>
#include <test/cpp/api/util.h>
using namespace torch;
using namespace torch::nn;
template <typename R, typename Func>
bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
auto nhid = 32;
auto model = std::make_shared<SimpleContainer>();
auto l1 = model->add(Linear(1, nhid), "l1");
auto rnn = model->add(model_maker(nhid), "rnn");
auto lo = model->add(Linear(nhid, 1), "lo");
auto optim = Adam(model, 1e-2).make();
auto forward_op = [&](Variable x) {
auto T = x.size(0);
auto B = x.size(1);
x = x.view({T * B, 1});
x = l1->forward({x})[0].view({T, B, nhid}).tanh_();
x = rnn->forward({x})[0][T - 1];
x = lo->forward({x})[0];
return x;
};
if (cuda) {
model->cuda();
}
float running_loss = 1;
int epoch = 0;
auto max_epoch = 1500;
while (running_loss > 1e-2) {
auto bs = 16U;
auto nlen = 5U;
const auto backend = cuda ? at::kCUDA : at::kCPU;
auto inp = at::rand({nlen, bs, 1}, backend).round().toType(at::kFloat);
auto lab = inp.sum(0);
auto x = autograd::make_variable(inp, /*requires_grad=*/true);
auto y = autograd::make_variable(lab);
x = forward_op(x);
Variable loss = at::mse_loss(x, y);
optim->zero_grad();
loss.backward();
optim->step();
running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01;
if (epoch > max_epoch) {
return false;
}
epoch++;
}
return true;
};
void check_lstm_sizes(std::vector<Variable> tup) {
// Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
// 10 and 16 time steps (10 x 16 x n)
auto out = tup[0];
auto hids = tup[1];
REQUIRE(out.ndimension() == 3);
REQUIRE(out.size(0) == 10);
REQUIRE(out.size(1) == 16);
REQUIRE(out.size(2) == 64);
REQUIRE(hids.ndimension() == 4);
REQUIRE(hids.size(0) == 2); // (hx, cx)
REQUIRE(hids.size(1) == 3); // layers
REQUIRE(hids.size(2) == 16); // Batchsize
REQUIRE(hids.size(3) == 64); // 64 hidden dims
// Something is in the hiddens
REQUIRE(hids.norm().toCFloat() > 0);
}
TEST_CASE("rnn") {
SECTION("lstm") {
SECTION("sizes") {
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
auto x = torch::randn({10, 16, 128}, at::requires_grad());
auto tup = model->forward({x});
auto y = x.mean();
y.backward();
check_lstm_sizes(tup);
auto next = model->forward({x, tup[1]});
check_lstm_sizes(next);
Variable diff = next[1] - tup[1];
// Hiddens changed
REQUIRE(diff.data().abs().sum().toCFloat() > 1e-3);
}
SECTION("outputs") {
// Make sure the outputs match pytorch outputs
LSTM model(2, 2);
for (auto& v : model->parameters()) {
float size = v->numel();
auto p = static_cast<float*>(v->data().storage()->data());
for (size_t i = 0; i < size; i++) {
p[i] = i / size;
}
}
auto x = torch::empty({3, 4, 2}, at::requires_grad());
float size = x.data().numel();
auto p = static_cast<float*>(x.data().storage()->data());
for (size_t i = 0; i < size; i++) {
p[i] = (size - i) / size;
}
auto out = model->forward({x});
REQUIRE(out[0].ndimension() == 3);
REQUIRE(out[0].size(0) == 3);
REQUIRE(out[0].size(1) == 4);
REQUIRE(out[0].size(2) == 2);
auto flat = out[0].data().view(3 * 4 * 2);
float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
for (size_t i = 0; i < 3 * 4 * 2; i++) {
REQUIRE(std::abs(flat[i].toCFloat() - c_out[i]) < 1e-3);
}
REQUIRE(out[1].ndimension() == 4); // (hx, cx) x layers x B x 2
REQUIRE(out[1].size(0) == 2);
REQUIRE(out[1].size(1) == 1);
REQUIRE(out[1].size(2) == 4);
REQUIRE(out[1].size(3) == 2);
flat = out[1].data().view(16);
float h_out[] = {0.7889,
0.9003,
0.7769,
0.8905,
0.7635,
0.8794,
0.7484,
0.8666,
1.1647,
1.6106,
1.1425,
1.5726,
1.1187,
1.5329,
1.0931,
1.4911};
for (size_t i = 0; i < 16; i++) {
REQUIRE(std::abs(flat[i].toCFloat() - h_out[i]) < 1e-3);
}
}
}
}
TEST_CASE("rnn/integration/LSTM") {
REQUIRE(test_RNN_xor<LSTM>(
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }));
}
TEST_CASE("rnn/integration/GRU") {
REQUIRE(test_RNN_xor<GRU>(
[](int s) { return GRU(GRUOptions(s, s).layers(2)); }));
}
TEST_CASE("rnn/integration/RNN") {
SECTION("relu") {
REQUIRE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }));
}
SECTION("tanh") {
REQUIRE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }));
}
}
TEST_CASE("rnn_cuda", "[cuda]") {
SECTION("sizes") {
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
model->cuda();
auto x = torch::randn({10, 16, 128}, at::requires_grad().device(at::kCUDA));
auto tup = model->forward({x});
auto y = x.mean();
y.backward();
check_lstm_sizes(tup);
auto next = model->forward({x, tup[1]});
check_lstm_sizes(next);
Variable diff = next[1] - tup[1];
// Hiddens changed
REQUIRE(diff.data().abs().sum().toCFloat() > 1e-3);
};
SECTION("lstm") {
REQUIRE(test_RNN_xor<LSTM>(
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true));
}
SECTION("gru") {
REQUIRE(test_RNN_xor<GRU>(
[](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true));
}
SECTION("rnn") {
SECTION("relu") {
REQUIRE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); },
true));
}
SECTION("tanh") {
REQUIRE(test_RNN_xor<RNN>(
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); },
true));
}
}
}