mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Fix warnings exposed by gcc-4.9.x's -Wshadow-compatible-local I plan to enable this for all of fbcode, soon. See t13698406 for justification. Rename outer "rank" to "rank0" (to avoid shadowing another "rank" just below). Also rename outer "size" to "size0" for the same reason. This avoids the following errors: caffe2/caffe2/mpi/mpi_gpu_test.cc:132:9: error: declaration of 'rank' shadows a previous local [-Werror=shadow-compatible-local] caffe2/caffe2/mpi/mpi_gpu_test.cc:120:7: error: shadowed declaration is here [-Werror=shadow-compatible-local] caffe2/caffe2/mpi/mpi_gpu_test.cc:134:9: error: declaration of 'size' shadows a previous local [-Werror=shadow-compatible-local] caffe2/caffe2/mpi/mpi_gpu_test.cc:123:7: error: shadowed declaration is here [-Werror=shadow-compatible-local] Reviewed By: Yangqing Differential Revision: D4544806 fbshipit-source-id: 4cfa412dd672919174d487e60aa503a32125da03
348 lines
7.9 KiB
C++
348 lines
7.9 KiB
C++
#include "caffe2/core/init.h"
|
|
#include "caffe2/core/context_gpu.h"
|
|
#include "caffe2/core/net.h"
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/mpi/mpi_common.h"
|
|
#include "google/protobuf/text_format.h"
|
|
#include "gtest/gtest.h"
|
|
|
|
CAFFE2_DEFINE_string(
|
|
caffe_test_root, "gen/", "The root of the caffe test folder.");
|
|
|
|
namespace caffe2 {
|
|
|
|
const char kBcastNet[] = R"NET(
|
|
name: "bcast"
|
|
op {
|
|
output: "comm"
|
|
type: "CreateCommonWorld"
|
|
engine: "MPI"
|
|
}
|
|
op {
|
|
output: "X"
|
|
type: "ConstantFill"
|
|
arg {
|
|
name: "shape"
|
|
ints: 10
|
|
}
|
|
arg {
|
|
name: "value"
|
|
f: 0.0
|
|
}
|
|
}
|
|
op {
|
|
input: "comm"
|
|
input: "X"
|
|
output: "X"
|
|
type: "Broadcast"
|
|
engine: "MPI"
|
|
arg {
|
|
name: "root"
|
|
i: 0
|
|
}
|
|
}
|
|
device_option {
|
|
device_type: 1
|
|
}
|
|
)NET";
|
|
|
|
TEST(MPITest, TestMPIBroadcast) {
|
|
NetDef net_def;
|
|
CHECK(google::protobuf::TextFormat::ParseFromString(
|
|
string(kBcastNet), &net_def));
|
|
// Let's set the network's constant fill value to be the mpi rank.
|
|
auto* arg = net_def.mutable_op(1)->mutable_arg(1);
|
|
CAFFE_ENFORCE_EQ(arg->name(), "value");
|
|
int rank;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
arg->set_f(rank);
|
|
int size;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
|
|
|
for (int root = 0; root < size; ++root) {
|
|
net_def.mutable_op(2)->mutable_arg(0)->set_i(root);
|
|
Workspace ws;
|
|
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
|
EXPECT_NE(nullptr, net.get());
|
|
EXPECT_TRUE(net->Run());
|
|
// Let's test the value.
|
|
auto& X = ws.GetBlob("X")->Get<TensorCUDA>();
|
|
TensorCPU X_cpu(X);
|
|
EXPECT_EQ(X.size(), 10);
|
|
for (int i = 0; i < X.size(); ++i) {
|
|
EXPECT_EQ(X_cpu.data<float>()[i], root);
|
|
}
|
|
}
|
|
}
|
|
|
|
const char kReduceNet[] = R"NET(
|
|
name: "reduce"
|
|
op {
|
|
output: "comm"
|
|
type: "CreateCommonWorld"
|
|
engine: "MPI"
|
|
}
|
|
op {
|
|
output: "X"
|
|
type: "ConstantFill"
|
|
arg {
|
|
name: "shape"
|
|
ints: 10
|
|
}
|
|
arg {
|
|
name: "value"
|
|
f: 0.0
|
|
}
|
|
}
|
|
op {
|
|
input: "comm"
|
|
input: "X"
|
|
output: "X_reduced"
|
|
type: "Reduce"
|
|
engine: "MPI"
|
|
arg {
|
|
name: "root"
|
|
i: 0
|
|
}
|
|
}
|
|
device_option {
|
|
device_type: 1
|
|
}
|
|
)NET";
|
|
|
|
TEST(MPITest, TestMPIReduce) {
|
|
NetDef net_def;
|
|
CHECK(google::protobuf::TextFormat::ParseFromString(
|
|
string(kReduceNet), &net_def));
|
|
// Let's set the network's constant fill value to be the mpi rank.
|
|
auto* arg = net_def.mutable_op(1)->mutable_arg(1);
|
|
CAFFE_ENFORCE_EQ(arg->name(), "value");
|
|
int rank0;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank0);
|
|
arg->set_f(rank0);
|
|
int size0;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &size0);
|
|
|
|
for (int root = 0; root < size0; ++root) {
|
|
net_def.mutable_op(2)->mutable_arg(0)->set_i(root);
|
|
Workspace ws;
|
|
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
|
EXPECT_NE(nullptr, net.get());
|
|
EXPECT_TRUE(net->Run());
|
|
int rank;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
int size;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
|
if (rank == root) {
|
|
// Let's test the value.
|
|
auto& X = ws.GetBlob("X_reduced")->Get<TensorCUDA>();
|
|
EXPECT_EQ(X.size(), 10);
|
|
int expected_result = size * (size - 1) / 2;
|
|
TensorCPU X_cpu(X);
|
|
for (int i = 0; i < X.size(); ++i) {
|
|
EXPECT_EQ(X_cpu.data<float>()[i], expected_result);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const char kMPIAllgatherNet[] = R"NET(
|
|
name: "allgather"
|
|
op {
|
|
output: "comm"
|
|
type: "CreateCommonWorld"
|
|
engine: "MPI"
|
|
}
|
|
op {
|
|
output: "X"
|
|
type: "ConstantFill"
|
|
arg {
|
|
name: "shape"
|
|
ints: 2
|
|
ints: 10
|
|
}
|
|
arg {
|
|
name: "value"
|
|
f: 0.0
|
|
}
|
|
}
|
|
op {
|
|
input: "comm"
|
|
input: "X"
|
|
output: "X_gathered"
|
|
engine: "MPI"
|
|
type: "Allgather"
|
|
}
|
|
device_option {
|
|
device_type: 1
|
|
}
|
|
)NET";
|
|
|
|
TEST(MPITest, TestMPIAllgather) {
|
|
NetDef net_def;
|
|
CHECK(google::protobuf::TextFormat::ParseFromString(
|
|
string(kMPIAllgatherNet), &net_def));
|
|
// Let's set the network's constant fill value to be the mpi rank.
|
|
auto* arg = net_def.mutable_op(1)->mutable_arg(1);
|
|
CAFFE_ENFORCE_EQ(arg->name(), "value");
|
|
int rank;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
arg->set_f(rank);
|
|
int size;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
|
|
|
Workspace ws;
|
|
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
|
EXPECT_NE(nullptr, net.get());
|
|
EXPECT_TRUE(net->Run());
|
|
// Let's test the value.
|
|
auto& X = ws.GetBlob("X")->Get<TensorCUDA>();
|
|
TensorCPU X_cpu(X);
|
|
EXPECT_EQ(X.size(), 20);
|
|
for (int i = 0; i < X.size(); ++i) {
|
|
EXPECT_EQ(X_cpu.data<float>()[i], rank);
|
|
}
|
|
auto& X_gathered = ws.GetBlob("X_gathered")->Get<TensorCUDA>();
|
|
EXPECT_EQ(X_gathered.size(), 20 * size);
|
|
EXPECT_EQ(X_gathered.dim(0), 2 * size);
|
|
EXPECT_EQ(X_gathered.dim(1), 10);
|
|
TensorCPU X_gathered_cpu(X_gathered);
|
|
for (int i = 0; i < X_gathered.size(); ++i) {
|
|
EXPECT_EQ(X_gathered_cpu.data<float>()[i], i / 20);
|
|
}
|
|
}
|
|
|
|
const char kMPIAllreduceNet[] = R"NET(
|
|
name: "allreduce"
|
|
op {
|
|
output: "comm"
|
|
type: "CreateCommonWorld"
|
|
engine: "MPI"
|
|
}
|
|
op {
|
|
output: "X"
|
|
type: "ConstantFill"
|
|
arg {
|
|
name: "shape"
|
|
ints: 10
|
|
}
|
|
arg {
|
|
name: "value"
|
|
f: 0.0
|
|
}
|
|
}
|
|
op {
|
|
input: "comm"
|
|
input: "X"
|
|
output: "X_reduced"
|
|
type: "Allreduce"
|
|
engine: "MPI"
|
|
}
|
|
device_option {
|
|
device_type: 1
|
|
}
|
|
)NET";
|
|
|
|
TEST(MPITest, TestMPIAllreduce) {
|
|
NetDef net_def;
|
|
CHECK(google::protobuf::TextFormat::ParseFromString(
|
|
string(kMPIAllreduceNet), &net_def));
|
|
// Let's set the network's constant fill value to be the mpi rank.
|
|
auto* arg = net_def.mutable_op(1)->mutable_arg(1);
|
|
CAFFE_ENFORCE_EQ(arg->name(), "value");
|
|
int rank;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
arg->set_f(rank);
|
|
int size;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
|
|
|
Workspace ws;
|
|
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
|
EXPECT_NE(nullptr, net.get());
|
|
EXPECT_TRUE(net->Run());
|
|
// Let's test the value.
|
|
auto& X = ws.GetBlob("X")->Get<TensorCUDA>();
|
|
EXPECT_EQ(X.size(), 10);
|
|
TensorCPU X_cpu(X);
|
|
for (int i = 0; i < X.size(); ++i) {
|
|
EXPECT_EQ(X_cpu.data<float>()[i], rank);
|
|
}
|
|
auto& X_reduced = ws.GetBlob("X_reduced")->Get<TensorCUDA>();
|
|
EXPECT_EQ(X_reduced.size(), 10);
|
|
int expected_result = size * (size - 1) / 2;
|
|
TensorCPU X_reduced_cpu(X_reduced);
|
|
for (int i = 0; i < X_reduced.size(); ++i) {
|
|
EXPECT_EQ(X_reduced_cpu.data<float>()[i], expected_result);
|
|
}
|
|
}
|
|
|
|
const char kInPlaceMPIAllreduceNet[] = R"NET(
|
|
name: "allreduce"
|
|
op {
|
|
output: "comm"
|
|
type: "CreateCommonWorld"
|
|
engine: "MPI"
|
|
}
|
|
op {
|
|
output: "X"
|
|
type: "ConstantFill"
|
|
arg {
|
|
name: "shape"
|
|
ints: 10
|
|
}
|
|
arg {
|
|
name: "value"
|
|
f: 0.0
|
|
}
|
|
}
|
|
op {
|
|
input: "comm"
|
|
input: "X"
|
|
output: "X"
|
|
type: "Allreduce"
|
|
engine: "MPI"
|
|
}
|
|
device_option {
|
|
device_type: 1
|
|
}
|
|
)NET";
|
|
|
|
TEST(MPITest, TestInPlaceMPIAllreduce) {
|
|
NetDef net_def;
|
|
CHECK(google::protobuf::TextFormat::ParseFromString(
|
|
string(kInPlaceMPIAllreduceNet), &net_def));
|
|
// Let's set the network's constant fill value to be the mpi rank.
|
|
auto* arg = net_def.mutable_op(1)->mutable_arg(1);
|
|
CAFFE_ENFORCE_EQ(arg->name(), "value");
|
|
int rank;
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
arg->set_f(rank);
|
|
int size;
|
|
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
|
|
|
Workspace ws;
|
|
unique_ptr<NetBase> net(CreateNet(net_def, &ws));
|
|
EXPECT_NE(nullptr, net.get());
|
|
EXPECT_TRUE(net->Run());
|
|
auto& X_reduced = ws.GetBlob("X")->Get<TensorCUDA>();
|
|
EXPECT_EQ(X_reduced.size(), 10);
|
|
int expected_result = size * (size - 1) / 2;
|
|
TensorCPU X_reduced_cpu(X_reduced);
|
|
for (int i = 0; i < X_reduced.size(); ++i) {
|
|
EXPECT_EQ(X_reduced_cpu.data<float>()[i], expected_result);
|
|
}
|
|
}
|
|
|
|
} // namespace caffe2
|
|
|
|
|
|
GTEST_API_ int main(int argc, char **argv) {
|
|
int mpi_ret;
|
|
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpi_ret);
|
|
testing::InitGoogleTest(&argc, argv);
|
|
caffe2::GlobalInit(&argc, &argv);
|
|
int test_result = RUN_ALL_TESTS();
|
|
MPI_Finalize();
|
|
return test_result;
|
|
}
|