#include #include #include #include #include using namespace torch::nn; struct NNUtilsTest : torch::test::SeedingFixture {}; torch::Tensor PadSequence_pad(const torch::Tensor& tensor, int64_t length) { torch::NoGradGuard no_grad; std::vector tensor_sizes{length - tensor.size(0)}; tensor_sizes.insert( tensor_sizes.end(), tensor.sizes().slice(1).begin(), tensor.sizes().slice(1).end()); return torch::cat({tensor, torch::zeros(tensor_sizes, tensor.options())}); } TEST_F(NNUtilsTest, PadSequence) { // single dimensional auto a = torch::tensor({1, 2, 3}); auto b = torch::tensor({4, 5}); auto c = torch::tensor({6}); torch::Tensor expected, padded; // batch_first = true expected = torch::tensor({{4, 5, 0}, {1, 2, 3}, {6, 0, 0}}); padded = utils::rnn::pad_sequence({b, a, c}, true); ASSERT_TRUE(padded.allclose(expected)); // batch_first = false padded = utils::rnn::pad_sequence({b, a, c}); ASSERT_TRUE(padded.allclose(expected.transpose(0, 1))); // pad with non-zero value expected = torch::tensor({{4, 5, 1}, {1, 2, 3}, {6, 1, 1}}); padded = utils::rnn::pad_sequence({b, a, c}, true, 1); ASSERT_TRUE(padded.allclose(expected)); // Test pad sorted sequence expected = torch::tensor({{1, 2, 3}, {4, 5, 0}, {6, 0, 0}}); padded = utils::rnn::pad_sequence({a, b, c}, true); ASSERT_TRUE(padded.allclose(expected)); // more dimensions int64_t maxlen = 9; for (int64_t num_dim : std::vector{0, 1, 2, 3}) { std::vector sequences; std::vector trailing_dims(num_dim, 4); for (int64_t i = 1; i < maxlen + 1; i++) { int64_t seq_len = i * i; std::vector tensor_sizes{seq_len, 5}; tensor_sizes.insert( tensor_sizes.end(), trailing_dims.begin(), trailing_dims.end()); sequences.emplace_back(torch::rand(tensor_sizes)); } std::shuffle( std::begin(sequences), std::end(sequences), std::default_random_engine{}); std::vector expected_tensors; for (const torch::Tensor& seq : sequences) { expected_tensors.emplace_back(PadSequence_pad(seq, maxlen * maxlen)); } // batch first = true auto expected = torch::stack(expected_tensors); auto padded = utils::rnn::pad_sequence(sequences, true); ASSERT_TRUE(padded.allclose(expected)); // batch first = false padded = utils::rnn::pad_sequence(sequences); ASSERT_TRUE(padded.allclose(expected.transpose(0, 1))); } } TEST_F(NNUtilsTest, ClipGradNorm) { auto l = Linear(10, 10); float max_norm = 2; auto compute_norm = [&](float norm_type) -> float { float total_norm = 0.0; if (norm_type != std::numeric_limits::infinity()) { for (const auto& p : l->parameters()) { total_norm += p.grad().data().abs().pow(norm_type).sum().item().toFloat(); } return std::pow(total_norm, 1.0 / norm_type); } else { for (const auto& p : l->parameters()) { auto param_max = p.grad().data().abs().max().item().toFloat(); if (param_max > total_norm) { total_norm = param_max; } } return total_norm; } }; auto compare_scaling = [&](const std::vector& grads) -> torch::Tensor { std::vector p_scale; for (int i = 0; i < grads.size(); i++) { auto param = l->parameters()[i]; auto grad = grads[i]; p_scale.push_back(param.grad().data().div(grad).view(-1)); } auto scale = torch::cat(p_scale); return scale; // need to assert std is 0. }; std::vector grads = { torch::arange(1.0, 101).view({10, 10}), torch::ones({10}).div(1000), }; std::vector norm_types = { 0.5, 1.5, 2.0, 4.0, std::numeric_limits::infinity(), }; for (auto norm_type : norm_types) { for (int i = 0; i < grads.size(); i++) { l->parameters()[i].grad() = grads[i].clone().view_as(l->parameters()[i].data()); } auto norm_before = compute_norm(norm_type); auto norm = utils::clip_grad_norm_(l->parameters(), max_norm, norm_type); auto norm_after = compute_norm(norm_type); ASSERT_FLOAT_EQ(norm, norm_before); ASSERT_FLOAT_EQ(norm_after, max_norm); ASSERT_LE(norm_after, max_norm); auto scaled = compare_scaling(grads); ASSERT_NEAR(0, scaled.std().item().toFloat(), 1e-7); } // Small gradients should be left unchanged grads = { torch::rand({10, 10}).div(10000), torch::ones(10).div(500), }; for (auto norm_type : norm_types) { for (int i = 0; i < grads.size(); i++) { l->parameters()[i].grad().data().copy_(grads[i]); } auto norm_before = compute_norm(norm_type); auto norm = utils::clip_grad_norm_(l->parameters(), max_norm, norm_type); auto norm_after = compute_norm(norm_type); ASSERT_FLOAT_EQ(norm, norm_before); ASSERT_FLOAT_EQ(norm_before, norm_after); ASSERT_LE(norm_after, max_norm); auto scaled = compare_scaling(grads); ASSERT_NEAR(0, scaled.std().item().toFloat(), 1e-7); ASSERT_EQ(scaled[0].item().toFloat(), 1); } // should accept a single tensor as input auto p1 = torch::randn({10, 10}); auto p2 = torch::randn({10, 10}); auto g = torch::arange(1., 101).view({10, 10}); p1.grad() = g.clone(); p2.grad() = g.clone(); for (const auto norm_type : norm_types) { utils::clip_grad_norm_(p1, max_norm, norm_type); utils::clip_grad_norm_({p2}, max_norm, norm_type); ASSERT_TRUE(torch::allclose(p1.grad(), p2.grad())); } } TEST_F(NNUtilsTest, ClipGradValue) { auto l = Linear(10, 10); float clip_value = 2.5; torch::Tensor grad_w = torch::arange(-50., 50).view({10, 10}).div_(5); torch::Tensor grad_b = torch::ones({10}).mul_(2); std::vector> grad_lists = { {grad_w, grad_b}, {grad_w, torch::Tensor()}}; for (auto grad_list : grad_lists) { for (int i = 0; i < grad_list.size(); i++) { auto p = l->parameters()[i]; auto g = grad_list[i]; p.grad() = g.defined() ? g.clone().view_as(p.data()) : g; } utils::clip_grad_value_(l->parameters(), clip_value); for (const auto& p : l->parameters()) { if (p.grad().defined()) { ASSERT_LE( p.grad().data().max().item().toFloat(), clip_value); ASSERT_GE( p.grad().data().min().item().toFloat(), -clip_value); } } } // Should accept a single Tensor as input auto p1 = torch::randn({10, 10}); auto p2 = torch::randn({10, 10}); auto g = torch::arange(-50., 50).view({10, 10}).div_(5); p1.grad() = g.clone(); p2.grad() = g.clone(); utils::clip_grad_value_(p1, clip_value); utils::clip_grad_value_({p2}, clip_value); ASSERT_TRUE(torch::allclose(p1.grad(), p2.grad())); } TEST_F(NNUtilsTest, ConvertParameters) { std::vector parameters{ torch::arange(9, torch::kFloat32), torch::arange(9, torch::kFloat32).view({3, 3}), torch::arange(8, torch::kFloat32).view({2, 2, 2}) }; auto expected = torch::cat({ torch::arange(9, torch::kFloat32), torch::arange(9, torch::kFloat32).view(-1), torch::arange(8, torch::kFloat32).view(-1) }); auto vector = utils::parameters_to_vector(parameters); ASSERT_TRUE(vector.allclose(expected)); std::vector zero_parameters{ torch::zeros({9}, torch::kFloat32), torch::zeros({9}, torch::kFloat32).view({3, 3}), torch::zeros({8}, torch::kFloat32).view({2, 2, 2}) }; utils::vector_to_parameters(vector, zero_parameters); for (int i = 0; i < zero_parameters.size(); ++i) { ASSERT_TRUE(zero_parameters[i].allclose(parameters[i])); } { auto conv1 = Conv2d(3, 10, 5); auto fc1 = Linear(10, 20); auto model = Sequential(conv1, fc1); auto vec = utils::parameters_to_vector(model->parameters()); ASSERT_EQ(vec.size(0), 980); } { auto conv1 = Conv2d(3, 10, 5); auto fc1 = Linear(10, 20); auto model = Sequential(conv1, fc1); auto vec = torch::arange(0., 980); utils::vector_to_parameters(vec, model->parameters()); auto sample = model->parameters()[0][0][0][0]; ASSERT_TRUE(torch::equal(sample.data(), vec.data().slice(0, 0, 5))); } }