pytorch/test/cpp/api/modules.cpp
Peter Goldsborough ab0c72ab6f Replace cursors with OrderedDict (#13427)
Summary:
This is a pre-cursor diff to Python <-> C++ frontend integration -- I have a follow-up PR coming for that. This PR changes the C++ frontend module interface to replace the custom "cursor"s I introduced some time ago with `OrderedDict`. I introduced cursors at the time as a convenient way of applying functions and query operations on a modules' parameters, buffers and modules, allowing things like `module.parameters().map(my_func)`. However, I noticed that (1) this functionality is easily implement-able on top of a regular data structure and (2) more importantly,  using OrderedDicts is much, much easier for Python integration. This is especially true given that ScriptModule today also uses OrderedDict. Since C++ frontend modules and ScriptModules will soon too share as many implementation details as possible, it is overall the best move to ditch the custom cursor datastructure and pervasively use OrderedDict everywhere.

For this I did:

1. Changed the C++ frontend module interface to more closely match the Python one by providing `parameters()`, `named_parameters()` and other methods Python provides. This is very important for the following diff which binds these into Python for inter-op with Python modules.
2. In lieu of the `Cursor::apply()` method I added `nn::Module::apply`. This again is one more unifying step between Python and C++, since Python modules have an apply function too.
3. Deleted all uses of Cursor.
4. Tidied and beefed up the `OrderedDict` class. In particular, I made `OrderedDict::Item` store an `std::pair` under the hood, because that is trivial to bind into Python and saved me a lot of headaches. `key` and `value` become methods instead of fields, which they should have been from the very start anyway because it allows exactly these kinds of changes, as per usual good software engineering principle of encapsulation.
5. Added many tests for the OrderedDict use in `nn::Module`.

ebetica ezyang
Pull Request resolved: https://github.com/pytorch/pytorch/pull/13427

Differential Revision: D12894092

Pulled By: goldsborough

fbshipit-source-id: 715770c95a9643753a1db26d7f9da9a78619a15d
2018-11-07 11:10:05 -08:00

328 lines
9.3 KiB
C++

#include <gtest/gtest.h>
#include <torch/nn/module.h>
#include <torch/nn/modules/batchnorm.h>
#include <torch/nn/modules/conv.h>
#include <torch/nn/modules/dropout.h>
#include <torch/nn/modules/embedding.h>
#include <torch/nn/modules/functional.h>
#include <torch/nn/modules/linear.h>
#include <torch/types.h>
#include <torch/utils.h>
#include <test/cpp/api/support.h>
using namespace torch::nn;
using namespace torch::test;
class TestModel : public torch::nn::Module {
public:
TestModel()
: l1(register_module("l1", Linear(10, 3))),
l2(register_module("l2", Linear(3, 5))),
l3(register_module("l3", Linear(5, 100))) {}
Linear l1, l2, l3;
};
class NestedModel : public torch::nn::Module {
public:
NestedModel()
: param_(register_parameter("param", torch::empty({3, 2, 21}))),
l1(register_module("l1", Linear(5, 20))),
t(register_module("test", std::make_shared<TestModel>())) {}
torch::Tensor param_;
Linear l1;
std::shared_ptr<TestModel> t;
};
struct ModulesTest : torch::test::SeedingFixture {};
TEST_F(ModulesTest, Conv1d) {
Conv1d model(Conv1dOptions(3, 2, 3).stride(2));
auto x = torch::randn({2, 3, 5}, torch::requires_grad());
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 3);
ASSERT_EQ(s.ndimension(), 0);
for (auto i = 0; i < 3; i++) {
ASSERT_EQ(y.size(i), 2);
}
ASSERT_EQ(model->weight.grad().numel(), 3 * 2 * 3);
}
TEST_F(ModulesTest, Conv2dEven) {
Conv2d model(Conv2dOptions(3, 2, 3).stride(2));
auto x = torch::randn({2, 3, 5, 5}, torch::requires_grad());
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 4);
ASSERT_EQ(s.ndimension(), 0);
for (auto i = 0; i < 4; i++) {
ASSERT_EQ(y.size(i), 2);
}
ASSERT_EQ(model->weight.grad().numel(), 3 * 2 * 3 * 3);
}
TEST_F(ModulesTest, Conv2dUneven) {
Conv2d model(Conv2dOptions(3, 2, {3, 2}).stride({2, 2}));
auto x = torch::randn({2, 3, 5, 4}, torch::requires_grad());
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 4);
ASSERT_EQ(s.ndimension(), 0);
for (auto i = 0; i < 4; i++) {
ASSERT_EQ(y.size(i), 2);
}
ASSERT_EQ(model->weight.grad().numel(), 3 * 2 * 3 * 2);
}
TEST_F(ModulesTest, Conv3d) {
Conv3d model(Conv3dOptions(3, 2, 3).stride(2));
auto x = torch::randn({2, 3, 5, 5, 5}, torch::requires_grad());
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 5);
ASSERT_EQ(s.ndimension(), 0);
for (auto i = 0; i < 5; i++) {
ASSERT_EQ(y.size(i), 2);
}
ASSERT_TRUE(model->weight.grad().numel() == 3 * 2 * 3 * 3 * 3);
}
TEST_F(ModulesTest, Linear) {
Linear model(5, 2);
auto x = torch::randn({10, 5}, torch::requires_grad());
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 2);
ASSERT_EQ(s.ndimension(), 0);
ASSERT_EQ(y.size(0), 10);
ASSERT_EQ(y.size(1), 2);
ASSERT_EQ(model->weight.grad().numel(), 2 * 5);
}
TEST_F(ModulesTest, SimpleContainer) {
auto model = std::make_shared<SimpleContainer>();
auto l1 = model->add(Linear(10, 3), "l1");
auto l2 = model->add(Linear(3, 5), "l2");
auto l3 = model->add(Linear(5, 100), "l3");
auto x = torch::randn({1000, 10}, torch::requires_grad());
x = l1->forward(x).clamp_min(0);
x = l2->forward(x).clamp_min(0);
x = l3->forward(x).clamp_min(0);
x.backward();
ASSERT_EQ(x.ndimension(), 2);
ASSERT_EQ(x.size(0), 1000);
ASSERT_EQ(x.size(1), 100);
ASSERT_EQ(x.min().item<float>(), 0);
}
TEST_F(ModulesTest, EmbeddingBasic) {
const int64_t dict_size = 10;
Embedding model(dict_size, 2);
ASSERT_TRUE(model->named_parameters().contains("weight"));
ASSERT_EQ(model->weight.ndimension(), 2);
ASSERT_EQ(model->weight.size(0), dict_size);
ASSERT_EQ(model->weight.size(1), 2);
// Cannot get gradients to change indices (input) - only for embedding
// params
auto x = torch::full({10}, dict_size - 1, torch::kInt64);
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 2);
ASSERT_EQ(s.ndimension(), 0);
ASSERT_EQ(y.size(0), 10);
ASSERT_EQ(y.size(1), 2);
ASSERT_EQ(model->weight.grad().numel(), 2 * dict_size);
}
TEST_F(ModulesTest, EmbeddingList) {
Embedding model(6, 4);
auto x = torch::full({2, 3}, 5, torch::kInt64);
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 3);
ASSERT_EQ(y.size(0), 2);
ASSERT_EQ(y.size(1), 3);
ASSERT_EQ(y.size(2), 4);
}
TEST_F(ModulesTest, Dropout) {
Dropout dropout(0.5);
torch::Tensor x = torch::ones(100, torch::requires_grad());
torch::Tensor y = dropout->forward(x);
y.backward();
ASSERT_EQ(y.ndimension(), 1);
ASSERT_EQ(y.size(0), 100);
ASSERT_LT(y.sum().item<float>(), 130); // Probably
ASSERT_GT(y.sum().item<float>(), 70); // Probably
dropout->eval();
y = dropout->forward(x);
ASSERT_EQ(y.sum().item<float>(), 100);
}
TEST_F(ModulesTest, Parameters) {
auto model = std::make_shared<NestedModel>();
auto parameters = model->named_parameters();
ASSERT_EQ(parameters["param"].size(0), 3);
ASSERT_EQ(parameters["param"].size(1), 2);
ASSERT_EQ(parameters["param"].size(2), 21);
ASSERT_EQ(parameters["l1.bias"].size(0), 20);
ASSERT_EQ(parameters["l1.weight"].size(0), 20);
ASSERT_EQ(parameters["l1.weight"].size(1), 5);
ASSERT_EQ(parameters["test.l1.bias"].size(0), 3);
ASSERT_EQ(parameters["test.l1.weight"].size(0), 3);
ASSERT_EQ(parameters["test.l1.weight"].size(1), 10);
ASSERT_EQ(parameters["test.l2.bias"].size(0), 5);
ASSERT_EQ(parameters["test.l2.weight"].size(0), 5);
ASSERT_EQ(parameters["test.l2.weight"].size(1), 3);
ASSERT_EQ(parameters["test.l3.bias"].size(0), 100);
ASSERT_EQ(parameters["test.l3.weight"].size(0), 100);
ASSERT_EQ(parameters["test.l3.weight"].size(1), 5);
}
TEST_F(ModulesTest, FunctionalCallsSuppliedFunction) {
bool was_called = false;
auto functional = Functional([&was_called](torch::Tensor input) {
was_called = true;
return input;
});
auto output = functional->forward(torch::ones(5, torch::requires_grad()));
ASSERT_TRUE(was_called);
ASSERT_TRUE(output.equal(torch::ones(5, torch::requires_grad())));
was_called = false;
// Use the call operator overload here.
output = functional(torch::ones(5, torch::requires_grad()));
ASSERT_TRUE(was_called);
ASSERT_TRUE(output.equal(torch::ones(5, torch::requires_grad())));
}
TEST_F(ModulesTest, FunctionalWithTorchFunction) {
auto functional = Functional(torch::relu);
ASSERT_EQ(functional(torch::ones({})).item<float>(), 1);
ASSERT_EQ(functional(torch::ones({})).item<float>(), 1);
ASSERT_EQ(functional(torch::ones({}) * -1).item<float>(), 0);
}
TEST_F(ModulesTest, FunctionalArgumentBinding) {
auto functional =
Functional(torch::elu, /*alpha=*/1, /*scale=*/0, /*input_scale=*/1);
ASSERT_EQ(functional(torch::ones({})).item<float>(), 0);
}
TEST_F(ModulesTest, BatchNormStateful) {
BatchNorm bn(5);
// Is stateful by default.
ASSERT_TRUE(bn->options.stateful());
ASSERT_TRUE(bn->running_mean.defined());
ASSERT_EQ(bn->running_mean.dim(), 1);
ASSERT_EQ(bn->running_mean.size(0), 5);
ASSERT_TRUE(bn->running_variance.defined());
ASSERT_EQ(bn->running_variance.dim(), 1);
ASSERT_EQ(bn->running_variance.size(0), 5);
// Is affine by default.
ASSERT_TRUE(bn->options.affine());
ASSERT_TRUE(bn->weight.defined());
ASSERT_EQ(bn->weight.dim(), 1);
ASSERT_EQ(bn->weight.size(0), 5);
ASSERT_TRUE(bn->bias.defined());
ASSERT_EQ(bn->bias.dim(), 1);
ASSERT_EQ(bn->bias.size(0), 5);
}
TEST_F(ModulesTest, BatchNormStateless) {
BatchNorm bn(BatchNormOptions(5).stateful(false).affine(false));
ASSERT_FALSE(bn->running_mean.defined());
ASSERT_FALSE(bn->running_variance.defined());
ASSERT_FALSE(bn->weight.defined());
ASSERT_FALSE(bn->bias.defined());
ASSERT_THROWS_WITH(
bn->forward(torch::ones({2, 5})),
"Calling BatchNorm::forward is only permitted "
"when the 'stateful' option is true (was false). "
"Use BatchNorm::pure_forward instead.");
}
TEST_F(ModulesTest, BatchNormPureForward) {
BatchNorm bn(BatchNormOptions(5).affine(false));
bn->eval();
// Want to make sure we use the supplied values in `pure_forward` even if
// we are stateful.
auto input = torch::randn({2, 5});
auto mean = torch::randn(5);
auto variance = torch::rand(5);
auto output = bn->pure_forward(input, mean, variance);
auto expected = (input - mean) / torch::sqrt(variance + bn->options.eps());
ASSERT_TRUE(output.allclose(expected));
}
TEST_F(ModulesTest, Linear_CUDA) {
Linear model(5, 2);
model->to(torch::kCUDA);
auto x =
torch::randn({10, 5}, torch::device(torch::kCUDA).requires_grad(true));
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 2);
ASSERT_EQ(s.ndimension(), 0);
ASSERT_EQ(y.size(0), 10);
ASSERT_EQ(y.size(1), 2);
ASSERT_EQ(model->weight.grad().numel(), 2 * 5);
}
TEST_F(ModulesTest, Linear2_CUDA) {
Linear model(5, 2);
model->to(torch::kCUDA);
model->to(torch::kCPU);
auto x = torch::randn({10, 5}, torch::requires_grad());
auto y = model->forward(x);
torch::Tensor s = y.sum();
s.backward();
ASSERT_EQ(y.ndimension(), 2);
ASSERT_EQ(s.ndimension(), 0);
ASSERT_EQ(y.size(0), 10);
ASSERT_EQ(y.size(1), 2);
ASSERT_EQ(model->weight.grad().numel(), 2 * 5);
}