Add partition info message to NetDef (#33616)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/33616

Att. We start by assign `node_name` of DeviceOption in each of the op in the net. The for each unique node_name, we will have a PartitionInfo describing the partition, including logic devices that it can be assigned and we establish the link by partition names.

Test Plan:
unittests

Canaries:
AF: https://our.intern.facebook.com/intern/ads/canary/424817103900710410
AI: https://our.intern.facebook.com/intern/ads/canary/424737510862189908

Reviewed By: ipiszy, bangshengtang, jfix71

Differential Revision: D20015493

fbshipit-source-id: 0bb0f30cfc3892f7b8709d87b8bc1fbab2f2c46d
This commit is contained in:
Yinghai Lu 2020-02-26 14:50:10 -08:00 committed by Facebook Github Bot
parent 51e405743f
commit 04f88a3a7b

View File

@ -316,6 +316,20 @@ message OperatorDef {
optional int64 op_version = 12;
}
// Partition definition.
message PartitionInfo {
// Name of the partition.
required string name = 1;
// A list of logic device ID, indicating which devices this partition
// can be executed on. If empty, it means the partition won't run on
// device but on host CPU instead.
repeated int32 device_id = 2;
// Extra debug info.
optional string extra_info = 3;
}
// Network definition.
message NetDef {
optional string name = 1; // the network's name
@ -356,8 +370,12 @@ message NetDef {
// blobs' contents may be overwritten.
repeated string external_input = 7;
repeated string external_output = 8;
// Partitioning info, indexed by partition names.
repeated PartitionInfo partition_info = 9;
}
// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
// now.
message ExecutionStep {