mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: This is an experimental build on top of what orionr and mingzhe09088 built. Essentially, the idea is that we will need separate *_API versions for different shared libraries. If this theory is right, I'll try to clean up the design a bit and document it properly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/11266 Reviewed By: orionr Differential Revision: D9682942 Pulled By: Yangqing fbshipit-source-id: c79653199e67a1500c9174f39f8b0357324763f3
87 lines
2.9 KiB
C++
87 lines
2.9 KiB
C++
#include "caffe2/mpi/mpi_ops.h"
|
|
#include "caffe2/core/context_gpu.h"
|
|
#include "caffe2/operators/operator_fallback_gpu.h"
|
|
|
|
|
|
namespace caffe2 {
|
|
|
|
// Here is a bunch of MPI macro definitions that allow us to see if the MPI
|
|
// version supports CUDA aware MPI functions or not.
|
|
|
|
#if OPEN_MPI
|
|
#define CAFFE2_OMPI_VERSION \
|
|
OMPI_MAJOR_VERSION * 10000 + OMPI_MINOR_VERSION * 100 + OMPI_RELEASE_VERSION
|
|
#if CAFFE2_OMPI_VERSION >= 20000
|
|
// OpenMPI 2.x now supports compile time check whether CUDA is supported.
|
|
#include "mpi-ext.h" /* Needed for CUDA-aware check */
|
|
#if MPIX_CUDA_AWARE_SUPPORT
|
|
#define CAFFE2_HAS_CUDA_MPI_BASICS 1
|
|
#define CAFFE2_HAS_CUDA_MPI_ALLREDUCE 1
|
|
#endif // MPIX_CUDA_AWARE_SUPPORT
|
|
#else // CAFFE2_OMPI_VERSION >= 2000
|
|
// In the case of OpenMPI 1.x, we don't have compile-time flags to
|
|
// figure out if CUDA is supported; as a result, we will assume that
|
|
// the user has built OpenMPI with CUDA support.
|
|
// CUDA-aware MPIBroadcast is introduced after OpenMPI 1.7.
|
|
#if CAFFE2_OMPI_VERSION >= 10700
|
|
#define CAFFE2_HAS_CUDA_MPI_BASICS 1
|
|
#else // CAFFE2_OMPI_VERSION >= 10700
|
|
#define CAFFE2_HAS_CUDA_MPI_BASICS 0
|
|
#endif // CAFFE2_OMPI_VERSION >= 10700
|
|
// CUDA-aware MPIAllreduce is introduced after OpenMPI 1.8.5.
|
|
#if CAFFE2_OMPI_VERSION >= 10805
|
|
#define CAFFE2_HAS_CUDA_MPI_ALLREDUCE 1
|
|
#else // CAFFE2_OMPI_VERSION >= 10805
|
|
#define CAFFE2_HAS_CUDA_MPI_ALLREDUCE 0
|
|
#endif // CAFFE2_OMPI_VERSION >= 10805
|
|
#endif // CAFFE2_OMPI_VERSION >= 2000
|
|
#else // !OPEN_MPI
|
|
// We have not really tested against other MPI environments, so let's go for a
|
|
// safe path and basically say we don't have cuda-aware functions.
|
|
#define CAFFE2_HAS_CUDA_MPI_BASICS 0
|
|
#define CAFFE2_HAS_CUDA_MPI_ALLREDUCE 0
|
|
#endif // OPEN_MPI
|
|
|
|
// We allow a macro to force using fallback functions.
|
|
#ifdef CAFFE2_FORCE_FALLBACK_CUDA_MPI
|
|
#undef CAFFE2_HAS_CUDA_MPI_BASICS
|
|
#undef CAFFE2_HAS_CUDA_MPI_ALLREDUCE
|
|
#define CAFFE2_HAS_CUDA_MPI_BASICS 0
|
|
#define CAFFE2_HAS_CUDA_MPI_ALLREDUCE 0
|
|
#endif // CAFFE2_FORCE_FALLBACK_CUDA_MPI
|
|
|
|
REGISTER_CUDA_OPERATOR(
|
|
MPICreateCommonWorld,
|
|
MPICreateCommonWorldOp<CUDAContext>);
|
|
#if CAFFE2_HAS_CUDA_MPI_BASICS
|
|
REGISTER_CUDA_OPERATOR(MPIBroadcast, MPIBroadcastOp<CUDAContext>);
|
|
REGISTER_CUDA_OPERATOR(MPIReduce, MPIReduceOp<float, CUDAContext>);
|
|
REGISTER_CUDA_OPERATOR(MPIAllgather, MPIAllgatherOp<float, CUDAContext>);
|
|
REGISTER_CUDA_OPERATOR(MPISendTensor, MPISendTensorOp<CUDAContext>);
|
|
REGISTER_CUDA_OPERATOR(MPIReceiveTensor, MPIReceiveTensorOp<CUDAContext>);
|
|
#else
|
|
REGISTER_CUDA_OPERATOR(MPIBroadcast, GPUFallbackOp);
|
|
REGISTER_CUDA_OPERATOR(
|
|
MPIReduce,
|
|
GPUFallbackOp);
|
|
REGISTER_CUDA_OPERATOR(
|
|
MPIAllgather,
|
|
GPUFallbackOp);
|
|
REGISTER_CUDA_OPERATOR(
|
|
MPISendTensor,
|
|
GPUFallbackOp);
|
|
REGISTER_CUDA_OPERATOR(
|
|
MPIReceiveTensor,
|
|
GPUFallbackOpEx<SkipIndices<1, 2>>);
|
|
#endif
|
|
|
|
#if CAFFE2_HAS_CUDA_MPI_ALLREDUCE
|
|
REGISTER_CUDA_OPERATOR(MPIAllreduce, MPIAllreduceOp<float, CUDAContext>);
|
|
#else
|
|
REGISTER_CUDA_OPERATOR(
|
|
MPIAllreduce,
|
|
GPUFallbackOp);
|
|
#endif
|
|
|
|
} // namespace caffe2
|