mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
204 lines
7.0 KiB
C++
204 lines
7.0 KiB
C++
#include "caffe2/core/operator.h"
|
|
#include "caffe2/operators/no_default_engine_op.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
OPERATOR_SCHEMA(CreateCommonWorld)
|
|
.NumInputs(0, 1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Creates a common world for communication operators.
|
|
)DOC")
|
|
.Input(0, "kv_handler", "Key/value handler for rendezvous (optional).")
|
|
.Output(0, "comm_world", "A common world for collective operations.")
|
|
.Arg("size", "(int) size of the common world.")
|
|
.Arg("rank", "(int) rank of this node in the common world.");
|
|
|
|
OPERATOR_SCHEMA(CloneCommonWorld)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Clones existing common world.
|
|
)DOC")
|
|
.Input(0, "existing_comm_world", "Existing common world to clone.")
|
|
.Output(0, "comm_world", "A common world for collective operations.");
|
|
|
|
OPERATOR_SCHEMA(DestroyCommonWorld)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.EnforceInplace({{0, 0}})
|
|
.SetDoc("Closes all connections managed by a common world.")
|
|
.Input(0, "common_world", "The common world to be destroyed.");
|
|
|
|
OPERATOR_SCHEMA(Broadcast)
|
|
.NumInputsOutputs([](int in, int out) {
|
|
return in >= 2 && out == (in - 1);
|
|
})
|
|
.EnforceInplace([](int in, int out) { return (in - 1) == out; })
|
|
.InputsCanCrossDevices()
|
|
.IdenticalTypeAndShapeOfInput(0)
|
|
.SetDoc(R"DOC(
|
|
Does a broadcast operation from the root node to every other node. The tensor
|
|
on each node should have been pre-created with the same shape and data type.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(1, "X", "A tensor to be broadcasted.")
|
|
.Output(0, "X", "In-place as input 1.")
|
|
.Arg("root", "(int, default 0) the root to run broadcast from.");
|
|
|
|
OPERATOR_SCHEMA(Reduce)
|
|
.NumInputs(2)
|
|
.NumOutputs(1)
|
|
.InputsCanCrossDevices()
|
|
.IdenticalTypeAndShapeOfInput(0)
|
|
.SetDoc(R"DOC(
|
|
Does a reduce operation from every node to the root node. Currently only
|
|
Sum is supported.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(1, "X", "A tensor to be reduced.")
|
|
.Output(0, "Y", "The reduced result on root, not set for other nodes.")
|
|
.Arg("root", "(int, default 0) the root to run reduce into.");
|
|
|
|
OPERATOR_SCHEMA(Allreduce)
|
|
.NumInputsOutputs([](int in, int out) {
|
|
return in >= 2 && out == (in - 1);
|
|
})
|
|
.EnforceInplace([](int in, int out) { return (in - 1) == out; })
|
|
.IdenticalTypeAndShapeOfInput(0)
|
|
.InputsCanCrossDevices()
|
|
.SetDoc(R"DOC(
|
|
Does an allreduce operation among the nodes. Currently only Sum is supported.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(1, "X", "A tensor to be allreduced.")
|
|
.Output(0, "Y", "The allreduced tensor, same on all nodes.");
|
|
|
|
OPERATOR_SCHEMA(ReduceScatter)
|
|
.NumInputsOutputs([](int in, int out) {
|
|
return in >= 2 && out == (in - 1);
|
|
})
|
|
.EnforceInplace([](int in, int out) { return (in - 1) == out; })
|
|
.IdenticalTypeAndShapeOfInput(0)
|
|
.InputsCanCrossDevices()
|
|
.SetDoc(R"DOC(
|
|
Does reduce-scatter operation among the nodes. Currently only Sum is supported.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(1, "X", "A tensor to be reduce-scattered.")
|
|
.Output(0, "Y", "The reduced tensor, scattered on all nodes.");
|
|
|
|
OPERATOR_SCHEMA(Allgather)
|
|
.NumInputs(2, INT_MAX)
|
|
.NumOutputs(1)
|
|
.InputsCanCrossDevices()
|
|
.SetDoc(R"DOC(
|
|
Does an allgather operation among the nodes.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(1, "X", "A tensor to be allgathered.")
|
|
.Output(0, "Y", "The allgathered tensor, same on all nodes.");
|
|
|
|
OPERATOR_SCHEMA(Barrier)
|
|
.NumInputs(1)
|
|
.SetDoc(R"DOC(
|
|
Does a barrier operation among the nodes.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.");
|
|
|
|
OPERATOR_SCHEMA(SendTensor)
|
|
.NumInputs({2, 4})
|
|
.NumOutputs(0)
|
|
.SetDoc(R"DOC(
|
|
Sends the tensor to another node.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(1, "X", "A tensor to be allgathered.")
|
|
.Input(
|
|
2,
|
|
"dst",
|
|
"An int CPUtensor of size 1 specifying the rank. If "
|
|
"given, this overrides the 'to' argument of the op.")
|
|
.Input(
|
|
3,
|
|
"tag",
|
|
"An int CPUtensor of size 1 specifying the tag to "
|
|
"send the tensor with. This overrides the 'tag' "
|
|
"argument of the op.")
|
|
.Arg("dst", "The rank to send the tensor to.")
|
|
.Arg("tag", "(int) a tag to send the tensor with.")
|
|
.Arg(
|
|
"raw_buffer",
|
|
"(bool) if set, only send the content and assume that the receiver "
|
|
"has already known the tensor's shape and information.");
|
|
|
|
OPERATOR_SCHEMA(ReceiveTensor)
|
|
.NumInputs({2, 4})
|
|
.NumOutputs(3)
|
|
.EnforceInplace({{1, 0}})
|
|
.AllowInplace({{2, 1}, {3, 2}})
|
|
.SetDoc(R"DOC(
|
|
Receives the tensor from another node.
|
|
)DOC")
|
|
.Input(0, "comm_world", "The common world.")
|
|
.Input(
|
|
1,
|
|
"Y",
|
|
"In-place output. If raw_buffer is specified, "
|
|
"Y should have pre-allocated data and type..")
|
|
.Input(
|
|
2,
|
|
"src",
|
|
"An int CPUtensor of size 1 specifying the rank. If "
|
|
"given, this overrides the 'from' argument of the op.")
|
|
.Input(
|
|
3,
|
|
"tag",
|
|
"An int CPUtensor of size 1 specifying the tag to "
|
|
"send the tensor with. This overrides the 'tag' "
|
|
"argument of the op.")
|
|
.Output(0, "Y", "The received tensor.")
|
|
.Output(
|
|
1,
|
|
"src",
|
|
"The sender that sent the message as a CPUTensor "
|
|
"of size 1 and of type int.")
|
|
.Output(
|
|
2,
|
|
"tag",
|
|
"The tag that the message is sent with as a CPUTensor "
|
|
"of size 1 and of type int.")
|
|
.Arg("src", "(int) he rank to receive the tensor from.")
|
|
.Arg("tag", "(int) a tag to receive the tensor with.")
|
|
.Arg(
|
|
"raw_buffer",
|
|
"(bool) if set, only send the content and assume that the receiver "
|
|
"has already known the tensor's shape and information.");
|
|
|
|
SHOULD_NOT_DO_GRADIENT(CreateCommonWorld);
|
|
SHOULD_NOT_DO_GRADIENT(CloneCommonWorld);
|
|
SHOULD_NOT_DO_GRADIENT(DestroyCommonWorld);
|
|
SHOULD_NOT_DO_GRADIENT(Broadcast);
|
|
SHOULD_NOT_DO_GRADIENT(Reduce);
|
|
SHOULD_NOT_DO_GRADIENT(Allgather);
|
|
SHOULD_NOT_DO_GRADIENT(Allreduce);
|
|
SHOULD_NOT_DO_GRADIENT(ReduceScatter);
|
|
SHOULD_NOT_DO_GRADIENT(Barrier);
|
|
SHOULD_NOT_DO_GRADIENT(SendTensor);
|
|
SHOULD_NOT_DO_GRADIENT(ReceiveTensor);
|
|
|
|
// Communication operators do not have default engines.
|
|
REGISTER_CPU_OPERATOR(CreateCommonWorld, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(CloneCommonWorld, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(DestroyCommonWorld, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(Broadcast, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(Reduce, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(Allgather, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(Allreduce, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(ReduceScatter, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(Barrier, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(SendTensor, NoDefaultEngineOp<CPUContext>);
|
|
REGISTER_CPU_OPERATOR(ReceiveTensor, NoDefaultEngineOp<CPUContext>);
|
|
|
|
} // namespace caffe2
|