#include #include #include using namespace torch::nn; using namespace torch::test; template bool test_RNN_xor(Func&& model_maker, bool cuda = false) { torch::manual_seed(0); auto nhid = 32; auto model = std::make_shared(); auto l1 = model->add(Linear(1, nhid), "l1"); auto rnn = model->add(model_maker(nhid), "rnn"); auto lo = model->add(Linear(nhid, 1), "lo"); torch::optim::Adam optimizer(model->parameters(), 1e-2); auto forward_op = [&](torch::Tensor x) { auto T = x.size(0); auto B = x.size(1); x = x.view({T * B, 1}); x = l1->forward(x).view({T, B, nhid}).tanh_(); x = rnn->forward(x).output[T - 1]; x = lo->forward(x); return x; }; if (cuda) { model->to(torch::kCUDA); } float running_loss = 1; int epoch = 0; auto max_epoch = 1500; while (running_loss > 1e-2) { auto bs = 16U; auto nlen = 5U; const auto backend = cuda ? torch::kCUDA : torch::kCPU; auto inputs = torch::rand({nlen, bs, 1}, backend).round().to(torch::kFloat32); auto labels = inputs.sum(0).detach(); inputs.set_requires_grad(true); auto outputs = forward_op(inputs); torch::Tensor loss = torch::mse_loss(outputs, labels); optimizer.zero_grad(); loss.backward(); optimizer.step(); running_loss = running_loss * 0.99 + loss.item() * 0.01; if (epoch > max_epoch) { return false; } epoch++; } return true; }; void check_lstm_sizes(RNNOutput output) { // Expect the LSTM to have 64 outputs and 3 layers, with an input of batch // 10 and 16 time steps (10 x 16 x n) ASSERT_EQ(output.output.ndimension(), 3); ASSERT_EQ(output.output.size(0), 10); ASSERT_EQ(output.output.size(1), 16); ASSERT_EQ(output.output.size(2), 64); ASSERT_EQ(output.state.ndimension(), 4); ASSERT_EQ(output.state.size(0), 2); // (hx, cx) ASSERT_EQ(output.state.size(1), 3); // layers ASSERT_EQ(output.state.size(2), 16); // Batchsize ASSERT_EQ(output.state.size(3), 64); // 64 hidden dims // Something is in the hiddens ASSERT_GT(output.state.norm().item(), 0); } struct RNNTest : torch::test::SeedingFixture {}; TEST_F(RNNTest, CheckOutputSizes) { LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2)); // Input size is: sequence length, batch size, input size auto x = torch::randn({10, 16, 128}, torch::requires_grad()); auto output = model->forward(x); auto y = x.mean(); y.backward(); check_lstm_sizes(output); auto next = model->forward(x, output.state); check_lstm_sizes(next); torch::Tensor diff = next.state - output.state; // Hiddens changed ASSERT_GT(diff.abs().sum().item(), 1e-3); } TEST_F(RNNTest, CheckOutputValuesMatchPyTorch) { torch::manual_seed(0); // Make sure the outputs match pytorch outputs LSTM model(2, 2); for (auto& v : model->parameters()) { float size = v.numel(); auto p = static_cast(v.storage().data()); for (size_t i = 0; i < size; i++) { p[i] = i / size; } } auto x = torch::empty({3, 4, 2}, torch::requires_grad()); float size = x.numel(); auto p = static_cast(x.storage().data()); for (size_t i = 0; i < size; i++) { p[i] = (size - i) / size; } auto out = model->forward(x); ASSERT_EQ(out.output.ndimension(), 3); ASSERT_EQ(out.output.size(0), 3); ASSERT_EQ(out.output.size(1), 4); ASSERT_EQ(out.output.size(2), 2); auto flat = out.output.view(3 * 4 * 2); float c_out[] = {0.4391, 0.5402, 0.4330, 0.5324, 0.4261, 0.5239, 0.4183, 0.5147, 0.6822, 0.8064, 0.6726, 0.7968, 0.6620, 0.7860, 0.6501, 0.7741, 0.7889, 0.9003, 0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666}; for (size_t i = 0; i < 3 * 4 * 2; i++) { ASSERT_LT(std::abs(flat[i].item() - c_out[i]), 1e-3); } ASSERT_EQ(out.state.ndimension(), 4); // (hx, cx) x layers x B x 2 ASSERT_EQ(out.state.size(0), 2); ASSERT_EQ(out.state.size(1), 1); ASSERT_EQ(out.state.size(2), 4); ASSERT_EQ(out.state.size(3), 2); flat = out.state.view(16); float h_out[] = {0.7889, 0.9003, 0.7769, 0.8905, 0.7635, 0.8794, 0.7484, 0.8666, 1.1647, 1.6106, 1.1425, 1.5726, 1.1187, 1.5329, 1.0931, 1.4911}; for (size_t i = 0; i < 16; i++) { ASSERT_LT(std::abs(flat[i].item() - h_out[i]), 1e-3); } } TEST_F(RNNTest, EndToEndLSTM) { ASSERT_TRUE(test_RNN_xor( [](int s) { return LSTM(LSTMOptions(s, s).layers(2)); })); } TEST_F(RNNTest, EndToEndGRU) { ASSERT_TRUE( test_RNN_xor([](int s) { return GRU(GRUOptions(s, s).layers(2)); })); } TEST_F(RNNTest, EndToEndRNNRelu) { ASSERT_TRUE(test_RNN_xor( [](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); })); } TEST_F(RNNTest, EndToEndRNNTanh) { ASSERT_TRUE(test_RNN_xor( [](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); })); } TEST_F(RNNTest, Sizes_CUDA) { torch::manual_seed(0); LSTM model(LSTMOptions(128, 64).layers(3).dropout(0.2)); model->to(torch::kCUDA); auto x = torch::randn({10, 16, 128}, torch::requires_grad().device(torch::kCUDA)); auto output = model->forward(x); auto y = x.mean(); y.backward(); check_lstm_sizes(output); auto next = model->forward(x, output.state); check_lstm_sizes(next); torch::Tensor diff = next.state - output.state; // Hiddens changed ASSERT_GT(diff.abs().sum().item(), 1e-3); } TEST_F(RNNTest, EndToEndLSTM_CUDA) { ASSERT_TRUE(test_RNN_xor( [](int s) { return LSTM(LSTMOptions(s, s).layers(2)); }, true)); } TEST_F(RNNTest, EndToEndGRU_CUDA) { ASSERT_TRUE(test_RNN_xor( [](int s) { return GRU(GRUOptions(s, s).layers(2)); }, true)); } TEST_F(RNNTest, EndToEndRNNRelu_CUDA) { ASSERT_TRUE(test_RNN_xor( [](int s) { return RNN(RNNOptions(s, s).relu().layers(2)); }, true)); } TEST_F(RNNTest, EndToEndRNNTanh_CUDA) { ASSERT_TRUE(test_RNN_xor( [](int s) { return RNN(RNNOptions(s, s).tanh().layers(2)); }, true)); } TEST_F(RNNTest, PrettyPrintRNNs) { ASSERT_EQ( c10::str(LSTM(LSTMOptions(128, 64).layers(3).dropout(0.2))), "torch::nn::LSTM(input_size=128, hidden_size=64, layers=3, dropout=0.2)"); ASSERT_EQ( c10::str(GRU(GRUOptions(128, 64).layers(3).dropout(0.5))), "torch::nn::GRU(input_size=128, hidden_size=64, layers=3, dropout=0.5)"); ASSERT_EQ( c10::str(RNN(RNNOptions(128, 64).layers(3).dropout(0.2).tanh())), "torch::nn::RNN(input_size=128, hidden_size=64, layers=3, dropout=0.2, activation=tanh)"); } // This test assures that flatten_parameters does not crash, // when bidirectional is set to true // https://github.com/pytorch/pytorch/issues/19545 TEST_F(RNNTest, BidirectionalFlattenParameters) { GRU gru(GRUOptions(100, 256).layers(2).bidirectional(true)); gru->flatten_parameters(); } template void copyParameters(torch::nn::ModuleHolder& target, size_t t_i, const torch::nn::ModuleHolder& source, size_t s_i) { at::NoGradGuard guard; target->w_ih[t_i].copy_(source->w_ih[s_i]); target->w_hh[t_i].copy_(source->w_hh[s_i]); target->b_ih[t_i].copy_(source->b_ih[s_i]); target->b_hh[t_i].copy_(source->b_hh[s_i]); } // This test is a port of python code introduced here: // https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66 // Reverse forward of bidrectional GRU should act // as regular forward of unidirectional GRU void BidirectionalGRUReverseForward(bool cuda) { auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false) .device(cuda ? torch::kCUDA : torch::kCPU); auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1}); auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1}); auto gru_options = GRUOptions(1, 1).layers(1).batch_first(false); GRU bi_grus {gru_options.bidirectional(true)}; GRU reverse_gru {gru_options.bidirectional(false)}; if (cuda) { bi_grus->to(torch::kCUDA); reverse_gru->to(torch::kCUDA); } // Now make sure the weights of the reverse gru layer match // ones of the (reversed) bidirectional's: copyParameters(reverse_gru, 0, bi_grus, 1); auto bi_output = bi_grus->forward(input); auto reverse_output = reverse_gru->forward(input_reversed); if (cuda) { bi_output.output = bi_output.output.to(torch::kCPU); bi_output.state = bi_output.state.to(torch::kCPU); reverse_output.output = reverse_output.output.to(torch::kCPU); reverse_output.state = reverse_output.state.to(torch::kCPU); } ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0)); auto size = bi_output.output.size(0); for (int i = 0; i < size; i++) { ASSERT_EQ(bi_output.output[i][0][1].item(), reverse_output.output[size - 1 - i][0][0].item()); } // The hidden states of the reversed GRUs sits // in the odd indices in the first dimension. ASSERT_EQ(bi_output.state[1][0][0].item(), reverse_output.state[0][0][0].item()); } TEST_F(RNNTest, BidirectionalGRUReverseForward) { BidirectionalGRUReverseForward(false); } TEST_F(RNNTest, BidirectionalGRUReverseForward_CUDA) { BidirectionalGRUReverseForward(true); } // Reverse forward of bidrectional LSTM should act // as regular forward of unidirectional LSTM void BidirectionalLSTMReverseForwardTest(bool cuda) { auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false) .device(cuda ? torch::kCUDA : torch::kCPU); auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1}); auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1}); auto lstm_opt = GRUOptions(1, 1).layers(1).batch_first(false); LSTM bi_lstm {lstm_opt.bidirectional(true)}; LSTM reverse_lstm {lstm_opt.bidirectional(false)}; if (cuda) { bi_lstm->to(torch::kCUDA); reverse_lstm->to(torch::kCUDA); } // Now make sure the weights of the reverse lstm layer match // ones of the (reversed) bidirectional's: copyParameters(reverse_lstm, 0, bi_lstm, 1); auto bi_output = bi_lstm->forward(input); auto reverse_output = reverse_lstm->forward(input_reversed); if (cuda) { bi_output.output = bi_output.output.to(torch::kCPU); bi_output.state = bi_output.state.to(torch::kCPU); reverse_output.output = reverse_output.output.to(torch::kCPU); reverse_output.state = reverse_output.state.to(torch::kCPU); } ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0)); auto size = bi_output.output.size(0); for (int i = 0; i < size; i++) { ASSERT_EQ(bi_output.output[i][0][1].item(), reverse_output.output[size - 1 - i][0][0].item()); } // The hidden states of the reversed LSTM sits // in the odd indices in the first dimension. ASSERT_EQ(bi_output.state[0][1][0][0].item(), reverse_output.state[0][0][0][0].item()); ASSERT_EQ(bi_output.state[1][1][0][0].item(), reverse_output.state[1][0][0][0].item()); } TEST_F(RNNTest, BidirectionalLSTMReverseForward) { BidirectionalLSTMReverseForwardTest(false); } TEST_F(RNNTest, BidirectionalLSTMReverseForward_CUDA) { BidirectionalLSTMReverseForwardTest(true); } TEST_F(RNNTest, BidirectionalMultilayerGRU_CPU_vs_CUDA) { // Create two GRUs with the same options auto opt = GRUOptions(2, 4).layers(3).batch_first(false).bidirectional(true); GRU gru_cpu {opt}; GRU gru_cuda {opt}; // Copy weights and biases from CPU GRU to CUDA GRU { at::NoGradGuard guard; const auto num_directions = gru_cpu->options.bidirectional() ? 2 : 1; for (int64_t layer = 0; layer < gru_cpu->options.layers(); layer++) { for (auto direction = 0; direction < num_directions; direction++) { const auto layer_idx = (layer * num_directions) + direction; copyParameters(gru_cuda, layer_idx, gru_cpu, layer_idx); } } } gru_cpu->flatten_parameters(); gru_cuda->flatten_parameters(); // Move GRU to CUDA gru_cuda->to(torch::kCUDA); // Create the same inputs auto input_opt = torch::TensorOptions() .dtype(torch::kFloat32).requires_grad(false); auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt) .reshape({3, 1, 2}); auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt) .reshape({3, 1, 2}).to(torch::kCUDA); // Call forward on both GRUs auto output_cpu = gru_cpu->forward(input_cpu); auto output_cuda = gru_cuda->forward(input_cuda); output_cpu.output = output_cpu.output.to(torch::kCPU); output_cpu.state = output_cpu.state.to(torch::kCPU); // Assert that the output and state are equal on CPU and CUDA ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim()); for (int i = 0; i < output_cpu.output.dim(); i++) { ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i)); } for (int i = 0; i < output_cpu.output.size(0); i++) { for (int j = 0; j < output_cpu.output.size(1); j++) { for (int k = 0; k < output_cpu.output.size(2); k++) { ASSERT_NEAR( output_cpu.output[i][j][k].item(), output_cuda.output[i][j][k].item(), 1e-5); } } } } TEST_F(RNNTest, BidirectionalMultilayerLSTM_CPU_vs_CUDA) { // Create two LSTMs with the same options auto opt = LSTMOptions(2, 4).layers(3).batch_first(false).bidirectional(true); LSTM lstm_cpu {opt}; LSTM lstm_cuda {opt}; // Copy weights and biases from CPU LSTM to CUDA LSTM { at::NoGradGuard guard; const auto num_directions = lstm_cpu->options.bidirectional() ? 2 : 1; for (int64_t layer = 0; layer < lstm_cpu->options.layers(); layer++) { for (auto direction = 0; direction < num_directions; direction++) { const auto layer_idx = (layer * num_directions) + direction; copyParameters(lstm_cuda, layer_idx, lstm_cpu, layer_idx); } } } lstm_cpu->flatten_parameters(); lstm_cuda->flatten_parameters(); // Move LSTM to CUDA lstm_cuda->to(torch::kCUDA); auto options = torch::TensorOptions() .dtype(torch::kFloat32).requires_grad(false); auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, options) .reshape({3, 1, 2}); auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, options) .reshape({3, 1, 2}).to(torch::kCUDA); // Call forward on both LSTMs auto output_cpu = lstm_cpu->forward(input_cpu); auto output_cuda = lstm_cuda->forward(input_cuda); output_cpu.output = output_cpu.output.to(torch::kCPU); output_cpu.state = output_cpu.state.to(torch::kCPU); // Assert that the output and state are equal on CPU and CUDA ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim()); for (int i = 0; i < output_cpu.output.dim(); i++) { ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i)); } for (int i = 0; i < output_cpu.output.size(0); i++) { for (int j = 0; j < output_cpu.output.size(1); j++) { for (int k = 0; k < output_cpu.output.size(2); k++) { ASSERT_NEAR( output_cpu.output[i][j][k].item(), output_cuda.output[i][j][k].item(), 1e-5); } } } }