mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary:
```
Use intrusive_ptr in Storage; replace unique_ptr<Storage> with Storage
This patch does two major changes:
- It replaces the use of Retainable in Storage with a new implementation
based on intrusive_ptr. This will be necessary because Caffe2 will
be using this class to implement intrusive_ptrs, and we need to
line these up for the merge. One good thing about the new implementation is
that the default copy/move constructors/assignment operators and destructor
work automatically, instead of needing to be hardcoded into Storage/Tensor.
- It replaces all places where we returned std::unique_ptr<Storage> with
Storage, collapsing an unnecessary double indirection that is no longer
necessary now that we have correctly working copy/move constructors.
I didn't initially want to do step (2), but it was very important to
eliminate all bare uses of new Storage and new StorageImpl, and this making
the API change was the most straightforward way to do this.
HOW TO FIX YOUR CODE IN THE NEW API
- You no longer need to dereference the result of tensor.storage() to pass
it to set. So, instead of:
x.set_(*y.storage());
just write:
x.set_(y.storage());
- If you were accessing methods on StorageImpl via the pImpl() method, you
must use the dot operator to run pImpl(). Even better; just drop pImpl,
we now have method forwarding. So, instead of:
storage->pImpl()->data();
just do:
storage->data();
// storage.pImpl()->data() works too but is not as recommended
- storage->getDevice() is no more; instead use storage->device().index()
MISC CODE UPDATES
- retain, release, weak_retain, weak_release and weak_lock are now
reimplemented using the "blessed API", and renamed to make it
clearer that their use is discouraged.
- nvcc OS X and general OS X portability improvements to intrusive_ptr
- A new comment in intrusive_ptr describing how stack allocated
intrusive_ptr_targets work differently than heap allocated ones
from c10::make_intrusive
CAVEAT EMPTOR
- THStorage_weakRetain used to work on strong pointers, but it NO LONGER
works with intrusive_ptr. You must reclaim the strong pointer into a
real strong pointer, construct a weak pointer from it, and then release
the strong and weak pointers. See StorageSharing.cpp for an example.
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10488
Reviewed By: gchanan
Differential Revision: D9306134
Pulled By: ezyang
fbshipit-source-id: 02d58ef62dab8e4da6131e1a24834a65c21048e2
234 lines
6.4 KiB
C++
234 lines
6.4 KiB
C++
#include <catch.hpp>
|
|
|
|
#include <torch/nn/modules/linear.h>
|
|
#include <torch/nn/modules/rnn.h>
|
|
#include <torch/optim/adam.h>
|
|
#include <torch/tensor.h>
|
|
#include <torch/utils.h>
|
|
|
|
#include <test/cpp/api/util.h>
|
|
|
|
using namespace torch::nn;
|
|
using namespace torch::test;
|
|
|
|
template <typename R, typename Func>
|
|
bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
|
|
torch::manual_seed(0);
|
|
|
|
auto nhid = 32;
|
|
auto model = std::make_shared<SimpleContainer>();
|
|
auto l1 = model->add(Linear(1, nhid), "l1");
|
|
auto rnn = model->add(model_maker(nhid), "rnn");
|
|
auto lo = model->add(Linear(nhid, 1), "lo");
|
|
|
|
torch::optim::Adam optimizer(model->parameters(), 1e-2);
|
|
auto forward_op = [&](torch::Tensor x) {
|
|
auto T = x.size(0);
|
|
auto B = x.size(1);
|
|
x = x.view({T * B, 1});
|
|
x = l1->forward(x).view({T, B, nhid}).tanh_();
|
|
x = rnn->forward(x).output[T - 1];
|
|
x = lo->forward(x);
|
|
return x;
|
|
};
|
|
|
|
if (cuda) {
|
|
model->to(torch::kCUDA);
|
|
}
|
|
|
|
float running_loss = 1;
|
|
int epoch = 0;
|
|
auto max_epoch = 1500;
|
|
while (running_loss > 1e-2) {
|
|
auto bs = 16U;
|
|
auto nlen = 5U;
|
|
|
|
const auto backend = cuda ? torch::kCUDA : torch::kCPU;
|
|
auto inputs =
|
|
torch::rand({nlen, bs, 1}, backend).round().toType(torch::kFloat32);
|
|
auto labels = inputs.sum(0).detach();
|
|
inputs.set_requires_grad(true);
|
|
|
|
auto outputs = forward_op(inputs);
|
|
torch::Tensor loss = torch::mse_loss(outputs, labels);
|
|
|
|
optimizer.zero_grad();
|
|
loss.backward();
|
|
optimizer.step();
|
|
|
|
running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01;
|
|
if (epoch > max_epoch) {
|
|
return false;
|
|
}
|
|
epoch++;
|
|
}
|
|
return true;
|
|
};
|
|
|
|
void check_lstm_sizes(RNNOutput output) {
|
|
// Expect the LSTM to have 64 outputs and 3 layers, with an input of batch
|
|
// 10 and 16 time steps (10 x 16 x n)
|
|
|
|
REQUIRE(output.output.ndimension() == 3);
|
|
REQUIRE(output.output.size(0) == 10);
|
|
REQUIRE(output.output.size(1) == 16);
|
|
REQUIRE(output.output.size(2) == 64);
|
|
|
|
REQUIRE(output.state.ndimension() == 4);
|
|
REQUIRE(output.state.size(0) == 2); // (hx, cx)
|
|
REQUIRE(output.state.size(1) == 3); // layers
|
|
REQUIRE(output.state.size(2) == 16); // Batchsize
|
|
REQUIRE(output.state.size(3) == 64); // 64 hidden dims
|
|
|
|
// Something is in the hiddens
|
|
REQUIRE(output.state.norm().toCFloat() > 0);
|
|
}
|
|
|
|
TEST_CASE("rnn") {
|
|
torch::manual_seed(0);
|
|
SECTION("sizes") {
|
|
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
|
|
auto x = torch::randn({10, 16, 128}, torch::requires_grad());
|
|
auto output = model->forward(x);
|
|
auto y = x.mean();
|
|
|
|
y.backward();
|
|
check_lstm_sizes(output);
|
|
|
|
auto next = model->forward(x, output.state);
|
|
|
|
check_lstm_sizes(next);
|
|
|
|
torch::Tensor diff = next.state - output.state;
|
|
|
|
// Hiddens changed
|
|
REQUIRE(diff.abs().sum().toCFloat() > 1e-3);
|
|
}
|
|
|
|
SECTION("outputs") {
|
|
// Make sure the outputs match pytorch outputs
|
|
LSTM model(2, 2);
|
|
for (auto& v : model->parameters()) {
|
|
float size = v->numel();
|
|
auto p = static_cast<float*>(v->storage().data());
|
|
for (size_t i = 0; i < size; i++) {
|
|
p[i] = i / size;
|
|
}
|
|
}
|
|
|
|
auto x = torch::empty({3, 4, 2}, torch::requires_grad());
|
|
float size = x.numel();
|
|
auto p = static_cast<float*>(x.storage().data());
|
|
for (size_t i = 0; i < size; i++) {
|
|
p[i] = (size - i) / size;
|
|
}
|
|
|
|
auto out = model->forward(x);
|
|
REQUIRE(out.output.ndimension() == 3);
|
|
REQUIRE(out.output.size(0) == 3);
|
|
REQUIRE(out.output.size(1) == 4);
|
|
REQUIRE(out.output.size(2) == 2);
|
|
|
|
auto flat = out.output.view(3 * 4 * 2);
|
|
float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239,
|
|
0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968,
|
|
0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003,
|
|
0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666};
|
|
for (size_t i = 0; i < 3 * 4 * 2; i++) {
|
|
REQUIRE(std::abs(flat[i].toCFloat() - c_out[i]) < 1e-3);
|
|
}
|
|
|
|
REQUIRE(out.state.ndimension() == 4); // (hx, cx) x layers x B x 2
|
|
REQUIRE(out.state.size(0) == 2);
|
|
REQUIRE(out.state.size(1) == 1);
|
|
REQUIRE(out.state.size(2) == 4);
|
|
REQUIRE(out.state.size(3) == 2);
|
|
flat = out.state.view(16);
|
|
float h_out[] = {0.7889,
|
|
0.9003,
|
|
0.7769,
|
|
0.8905,
|
|
0.7635,
|
|
0.8794,
|
|
0.7484,
|
|
0.8666,
|
|
1.1647,
|
|
1.6106,
|
|
1.1425,
|
|
1.5726,
|
|
1.1187,
|
|
1.5329,
|
|
1.0931,
|
|
1.4911};
|
|
for (size_t i = 0; i < 16; i++) {
|
|
REQUIRE(std::abs(flat[i].toCFloat() - h_out[i]) < 1e-3);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("rnn/integration/LSTM") {
|
|
REQUIRE(test_RNN_xor<LSTM>(
|
|
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }));
|
|
}
|
|
|
|
TEST_CASE("rnn/integration/GRU") {
|
|
REQUIRE(
|
|
test_RNN_xor<GRU>([](int s) { return GRU(GRUOptions(s, s).layers(2)); }));
|
|
}
|
|
|
|
TEST_CASE("rnn/integration/RNN") {
|
|
SECTION("relu") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }));
|
|
}
|
|
SECTION("tanh") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }));
|
|
}
|
|
}
|
|
|
|
TEST_CASE("rnn_cuda", "[cuda]") {
|
|
SECTION("sizes") {
|
|
torch::manual_seed(0);
|
|
LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2));
|
|
model->to(torch::kCUDA);
|
|
auto x = torch::randn(
|
|
{10, 16, 128}, torch::requires_grad().device(torch::kCUDA));
|
|
auto output = model->forward(x);
|
|
auto y = x.mean();
|
|
|
|
y.backward();
|
|
check_lstm_sizes(output);
|
|
|
|
auto next = model->forward(x, output.state);
|
|
|
|
check_lstm_sizes(next);
|
|
|
|
torch::Tensor diff = next.state - output.state;
|
|
|
|
// Hiddens changed
|
|
REQUIRE(diff.abs().sum().toCFloat() > 1e-3);
|
|
}
|
|
|
|
SECTION("lstm") {
|
|
REQUIRE(test_RNN_xor<LSTM>(
|
|
[](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true));
|
|
}
|
|
|
|
SECTION("gru") {
|
|
REQUIRE(test_RNN_xor<GRU>(
|
|
[](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true));
|
|
}
|
|
|
|
SECTION("rnn") {
|
|
SECTION("relu") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }, true));
|
|
}
|
|
SECTION("tanh") {
|
|
REQUIRE(test_RNN_xor<RNN>(
|
|
[](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }, true));
|
|
}
|
|
}
|
|
}
|