mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: - add `pad_sequence` and tests - related issue https://github.com/pytorch/pytorch/issues/25883 Pull Request resolved: https://github.com/pytorch/pytorch/pull/32387 Differential Revision: D20025421 Pulled By: yf225 fbshipit-source-id: caa9ae2114bece8db387a3a1610f24a3e06b1324
258 lines
8.2 KiB
C++
258 lines
8.2 KiB
C++
#include <gtest/gtest.h>
|
|
|
|
#include <torch/torch.h>
|
|
|
|
#include <test/cpp/api/support.h>
|
|
|
|
#include <algorithm>
|
|
#include <random>
|
|
|
|
using namespace torch::nn;
|
|
|
|
struct NNUtilsTest : torch::test::SeedingFixture {};
|
|
|
|
torch::Tensor PadSequence_pad(const torch::Tensor& tensor, int64_t length) {
|
|
torch::NoGradGuard no_grad;
|
|
std::vector<int64_t> tensor_sizes{length - tensor.size(0)};
|
|
tensor_sizes.insert(
|
|
tensor_sizes.end(),
|
|
tensor.sizes().slice(1).begin(),
|
|
tensor.sizes().slice(1).end());
|
|
return torch::cat({tensor, torch::zeros(tensor_sizes, tensor.options())});
|
|
}
|
|
|
|
TEST_F(NNUtilsTest, PadSequence) {
|
|
// single dimensional
|
|
auto a = torch::tensor({1, 2, 3});
|
|
auto b = torch::tensor({4, 5});
|
|
auto c = torch::tensor({6});
|
|
|
|
torch::Tensor expected, padded;
|
|
|
|
// batch_first = true
|
|
expected = torch::tensor({{4, 5, 0}, {1, 2, 3}, {6, 0, 0}});
|
|
padded = utils::rnn::pad_sequence({b, a, c}, true);
|
|
ASSERT_TRUE(padded.allclose(expected));
|
|
|
|
// batch_first = false
|
|
padded = utils::rnn::pad_sequence({b, a, c});
|
|
ASSERT_TRUE(padded.allclose(expected.transpose(0, 1)));
|
|
|
|
// pad with non-zero value
|
|
expected = torch::tensor({{4, 5, 1}, {1, 2, 3}, {6, 1, 1}});
|
|
padded = utils::rnn::pad_sequence({b, a, c}, true, 1);
|
|
ASSERT_TRUE(padded.allclose(expected));
|
|
|
|
// Test pad sorted sequence
|
|
expected = torch::tensor({{1, 2, 3}, {4, 5, 0}, {6, 0, 0}});
|
|
padded = utils::rnn::pad_sequence({a, b, c}, true);
|
|
ASSERT_TRUE(padded.allclose(expected));
|
|
|
|
// more dimensions
|
|
int64_t maxlen = 9;
|
|
for (int64_t num_dim : std::vector<int64_t>{0, 1, 2, 3}) {
|
|
std::vector<torch::Tensor> sequences;
|
|
std::vector<int64_t> trailing_dims(num_dim, 4);
|
|
for (int64_t i = 1; i < maxlen + 1; i++) {
|
|
int64_t seq_len = i * i;
|
|
std::vector<int64_t> tensor_sizes{seq_len, 5};
|
|
tensor_sizes.insert(
|
|
tensor_sizes.end(),
|
|
trailing_dims.begin(),
|
|
trailing_dims.end());
|
|
sequences.emplace_back(torch::rand(tensor_sizes));
|
|
}
|
|
std::shuffle(
|
|
std::begin(sequences),
|
|
std::end(sequences),
|
|
std::default_random_engine{});
|
|
std::vector<torch::Tensor> expected_tensors;
|
|
for (const torch::Tensor& seq : sequences) {
|
|
expected_tensors.emplace_back(PadSequence_pad(seq, maxlen * maxlen));
|
|
}
|
|
|
|
// batch first = true
|
|
auto expected = torch::stack(expected_tensors);
|
|
auto padded = utils::rnn::pad_sequence(sequences, true);
|
|
ASSERT_TRUE(padded.allclose(expected));
|
|
|
|
// batch first = false
|
|
padded = utils::rnn::pad_sequence(sequences);
|
|
ASSERT_TRUE(padded.allclose(expected.transpose(0, 1)));
|
|
}
|
|
}
|
|
|
|
TEST_F(NNUtilsTest, ClipGradNorm) {
|
|
auto l = Linear(10, 10);
|
|
float max_norm = 2;
|
|
auto compute_norm = [&](float norm_type) -> float {
|
|
float total_norm = 0.0;
|
|
if (norm_type != std::numeric_limits<float>::infinity()) {
|
|
for (const auto& p : l->parameters()) {
|
|
total_norm +=
|
|
p.grad().data().abs().pow(norm_type).sum().item().toFloat();
|
|
}
|
|
return std::pow(total_norm, 1.0 / norm_type);
|
|
} else {
|
|
for (const auto& p : l->parameters()) {
|
|
auto param_max = p.grad().data().abs().max().item().toFloat();
|
|
if (param_max > total_norm) {
|
|
total_norm = param_max;
|
|
}
|
|
}
|
|
return total_norm;
|
|
}
|
|
};
|
|
auto compare_scaling =
|
|
[&](const std::vector<torch::Tensor>& grads) -> torch::Tensor {
|
|
std::vector<torch::Tensor> p_scale;
|
|
for (int i = 0; i < grads.size(); i++) {
|
|
auto param = l->parameters()[i];
|
|
auto grad = grads[i];
|
|
p_scale.push_back(param.grad().data().div(grad).view(-1));
|
|
}
|
|
auto scale = torch::cat(p_scale);
|
|
return scale; // need to assert std is 0.
|
|
};
|
|
|
|
std::vector<torch::Tensor> grads = {
|
|
torch::arange(1.0, 101).view({10, 10}),
|
|
torch::ones({10}).div(1000),
|
|
};
|
|
std::vector<float> norm_types = {
|
|
0.5,
|
|
1.5,
|
|
2.0,
|
|
4.0,
|
|
std::numeric_limits<float>::infinity(),
|
|
};
|
|
for (auto norm_type : norm_types) {
|
|
for (int i = 0; i < grads.size(); i++) {
|
|
l->parameters()[i].grad() =
|
|
grads[i].clone().view_as(l->parameters()[i].data());
|
|
}
|
|
auto norm_before = compute_norm(norm_type);
|
|
auto norm = utils::clip_grad_norm_(l->parameters(), max_norm, norm_type);
|
|
auto norm_after = compute_norm(norm_type);
|
|
ASSERT_FLOAT_EQ(norm, norm_before);
|
|
ASSERT_FLOAT_EQ(norm_after, max_norm);
|
|
ASSERT_LE(norm_after, max_norm);
|
|
auto scaled = compare_scaling(grads);
|
|
ASSERT_NEAR(0, scaled.std().item().toFloat(), 1e-7);
|
|
}
|
|
// Small gradients should be left unchanged
|
|
grads = {
|
|
torch::rand({10, 10}).div(10000),
|
|
torch::ones(10).div(500),
|
|
};
|
|
for (auto norm_type : norm_types) {
|
|
for (int i = 0; i < grads.size(); i++) {
|
|
l->parameters()[i].grad().data().copy_(grads[i]);
|
|
}
|
|
auto norm_before = compute_norm(norm_type);
|
|
auto norm = utils::clip_grad_norm_(l->parameters(), max_norm, norm_type);
|
|
auto norm_after = compute_norm(norm_type);
|
|
ASSERT_FLOAT_EQ(norm, norm_before);
|
|
ASSERT_FLOAT_EQ(norm_before, norm_after);
|
|
ASSERT_LE(norm_after, max_norm);
|
|
auto scaled = compare_scaling(grads);
|
|
ASSERT_NEAR(0, scaled.std().item().toFloat(), 1e-7);
|
|
ASSERT_EQ(scaled[0].item().toFloat(), 1);
|
|
}
|
|
// should accept a single tensor as input
|
|
auto p1 = torch::randn({10, 10});
|
|
auto p2 = torch::randn({10, 10});
|
|
auto g = torch::arange(1., 101).view({10, 10});
|
|
p1.grad() = g.clone();
|
|
p2.grad() = g.clone();
|
|
for (const auto norm_type : norm_types) {
|
|
utils::clip_grad_norm_(p1, max_norm, norm_type);
|
|
utils::clip_grad_norm_({p2}, max_norm, norm_type);
|
|
ASSERT_TRUE(torch::allclose(p1.grad(), p2.grad()));
|
|
}
|
|
}
|
|
|
|
TEST_F(NNUtilsTest, ClipGradValue) {
|
|
auto l = Linear(10, 10);
|
|
float clip_value = 2.5;
|
|
|
|
torch::Tensor grad_w = torch::arange(-50., 50).view({10, 10}).div_(5);
|
|
torch::Tensor grad_b = torch::ones({10}).mul_(2);
|
|
std::vector<std::vector<torch::Tensor>> grad_lists = {
|
|
{grad_w, grad_b}, {grad_w, torch::Tensor()}};
|
|
for (auto grad_list : grad_lists) {
|
|
for (int i = 0; i < grad_list.size(); i++) {
|
|
auto p = l->parameters()[i];
|
|
auto g = grad_list[i];
|
|
p.grad() = g.defined() ? g.clone().view_as(p.data()) : g;
|
|
}
|
|
|
|
utils::clip_grad_value_(l->parameters(), clip_value);
|
|
for (const auto& p : l->parameters()) {
|
|
if (p.grad().defined()) {
|
|
ASSERT_LE(
|
|
p.grad().data().max().item().toFloat(), clip_value);
|
|
ASSERT_GE(
|
|
p.grad().data().min().item().toFloat(), -clip_value);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Should accept a single Tensor as input
|
|
auto p1 = torch::randn({10, 10});
|
|
auto p2 = torch::randn({10, 10});
|
|
auto g = torch::arange(-50., 50).view({10, 10}).div_(5);
|
|
p1.grad() = g.clone();
|
|
p2.grad() = g.clone();
|
|
utils::clip_grad_value_(p1, clip_value);
|
|
utils::clip_grad_value_({p2}, clip_value);
|
|
ASSERT_TRUE(torch::allclose(p1.grad(), p2.grad()));
|
|
}
|
|
|
|
TEST_F(NNUtilsTest, ConvertParameters) {
|
|
std::vector<torch::Tensor> parameters{
|
|
torch::arange(9, torch::kFloat32),
|
|
torch::arange(9, torch::kFloat32).view({3, 3}),
|
|
torch::arange(8, torch::kFloat32).view({2, 2, 2})
|
|
};
|
|
|
|
auto expected = torch::cat({
|
|
torch::arange(9, torch::kFloat32),
|
|
torch::arange(9, torch::kFloat32).view(-1),
|
|
torch::arange(8, torch::kFloat32).view(-1)
|
|
});
|
|
auto vector = utils::parameters_to_vector(parameters);
|
|
ASSERT_TRUE(vector.allclose(expected));
|
|
|
|
std::vector<torch::Tensor> zero_parameters{
|
|
torch::zeros({9}, torch::kFloat32),
|
|
torch::zeros({9}, torch::kFloat32).view({3, 3}),
|
|
torch::zeros({8}, torch::kFloat32).view({2, 2, 2})
|
|
};
|
|
|
|
utils::vector_to_parameters(vector, zero_parameters);
|
|
for (int i = 0; i < zero_parameters.size(); ++i) {
|
|
ASSERT_TRUE(zero_parameters[i].allclose(parameters[i]));
|
|
}
|
|
|
|
{
|
|
auto conv1 = Conv2d(3, 10, 5);
|
|
auto fc1 = Linear(10, 20);
|
|
auto model = Sequential(conv1, fc1);
|
|
|
|
auto vec = utils::parameters_to_vector(model->parameters());
|
|
ASSERT_EQ(vec.size(0), 980);
|
|
}
|
|
{
|
|
auto conv1 = Conv2d(3, 10, 5);
|
|
auto fc1 = Linear(10, 20);
|
|
auto model = Sequential(conv1, fc1);
|
|
|
|
auto vec = torch::arange(0., 980);
|
|
utils::vector_to_parameters(vec, model->parameters());
|
|
|
|
auto sample = model->parameters()[0][0][0][0];
|
|
ASSERT_TRUE(torch::equal(sample.data(), vec.data().slice(0, 0, 5)));
|
|
}
|
|
}
|