mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Enable TensorPipe's SHM transport (#50760)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/50760 The SHM transport uses shared-memory-backed ringbuffers to transfer small payloads between processes on the same machine. It was disabled in v1.6 due to a CMake mishap but we've since realized that it also doesn't work that well in docker and other setups. Enabling it here to see whether CircleCI fails. ghstack-source-id: 120470890 Test Plan: Exported three times to CircleCI with tests consistently passing Reviewed By: mrshenli Differential Revision: D23814828 fbshipit-source-id: f355cb6515776debad536924de4f4d3fbb05a874
This commit is contained in:
parent
d3ec204ef2
commit
b77f72b5a0
|
|
@ -574,7 +574,7 @@ jobs:
|
|||
hostname
|
||||
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
else
|
||||
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=1g --ipc=host -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
fi
|
||||
echo "id=${id}" >> "${BASH_ENV}"
|
||||
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ jobs:
|
|||
hostname
|
||||
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
else
|
||||
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=1g --ipc=host -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
fi
|
||||
echo "id=${id}" >> "${BASH_ENV}"
|
||||
|
||||
|
|
|
|||
|
|
@ -1347,7 +1347,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
|
|||
set(TP_ENABLE_CUDA_IPC ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE)
|
||||
set(TP_ENABLE_SHM OFF CACHE BOOL "" FORCE)
|
||||
set(TP_STATIC_OR_SHARED STATIC CACHE STRING "" FORCE)
|
||||
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
|
||||
|
|
@ -1851,4 +1850,3 @@ if(USE_KINETO)
|
|||
set(USE_KINETO OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,12 @@
|
|||
#include <ATen/cuda/CUDAMultiStreamGuard.h>
|
||||
#endif
|
||||
|
||||
#if TENSORPIPE_HAS_SHM_TRANSPORT
|
||||
// Needed for ::getpid(), which is used to create a unique address.
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace torch {
|
||||
namespace distributed {
|
||||
namespace rpc {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user