From 08ce5eedf526181bccc64a3b9a3bb869bc06c38d Mon Sep 17 00:00:00 2001
From: Luca Wehrstedt <lcw@fb.com>
Date: Fri, 18 Jun 2021 05:12:27 -0700
Subject: [PATCH] [reland] Move RPC agents to libtorch (#60170)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/60170

Reland of #59939.

Test Plan: CI

Reviewed By: mrshenli

Differential Revision: D29193234

fbshipit-source-id: ee2a90d6be961c10f91361512bdd4cadca43dd60
---
 BUILD.bazel                                   |   2 +-
 caffe2/CMakeLists.txt                         |  57 +--------
 cmake/Dependencies.cmake                      |   7 ++
 test/cpp/rpc/CMakeLists.txt                   |   4 +-
 third_party/tensorpipe                        |   2 +-
 third_party/tensorpipe.BUILD                  | 112 ++++++++++--------
 tools/build_variables.bzl                     |  12 +-
 torch/CMakeLists.txt                          |   6 -
 torch/csrc/distributed/rpc/macros.h           |   5 -
 .../csrc/distributed/rpc/tensorpipe_agent.cpp |   1 -
 torch/csrc/distributed/rpc/tensorpipe_agent.h |   1 -
 .../csrc/distributed/rpc/tensorpipe_cuda.cpp  |   3 +-
 .../csrc/distributed/rpc/tensorpipe_utils.cpp |   1 -
 torch/csrc/distributed/rpc/tensorpipe_utils.h |   1 -
 14 files changed, 86 insertions(+), 128 deletions(-)
 delete mode 100644 torch/csrc/distributed/rpc/macros.h

diff --git a/BUILD.bazel b/BUILD.bazel
index b7e16ac1c91..45d71a8d462 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1728,7 +1728,7 @@ cc_library(
         ],
         [
             ":aten",
-            "@tensorpipe",
+            "@tensorpipe//:tensorpipe_cpu",
         ],
     ),
     alwayslink = True,
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 174018456ef..88cffd1a75d 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -344,53 +344,6 @@ endif()
 
 
 if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
-  if(USE_DISTRIBUTED)
-
-    # Define this target even if we're building without TensorPipe, to make life
-    # easier to other targets that depend on this. However, in that case, by not
-    # setting the USE_TENSORPIPE compile definition, this target will just end
-    # up being empty. Downstream targets should also add a #ifdef guard.
-    if(NOT WIN32)
-      add_library(process_group_agent
-        "${TORCH_SRC_DIR}/csrc/distributed/rpc/agent_utils.cpp"
-        "${TORCH_SRC_DIR}/csrc/distributed/rpc/agent_utils.h"
-        "${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.cpp"
-        "${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.h"
-      )
-      target_link_libraries(process_group_agent PRIVATE torch fmt::fmt-header-only)
-      add_dependencies(process_group_agent torch)
-
-      if(USE_TENSORPIPE)
-        add_library(tensorpipe_agent
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/agent_utils.cpp"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/agent_utils.h"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/macros.h"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_agent.cpp"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_agent.h"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_cuda.cpp"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.cpp"
-          "${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.h"
-          )
-        target_link_libraries(tensorpipe_agent PRIVATE torch tensorpipe fmt::fmt-header-only)
-        add_dependencies(tensorpipe_agent torch)
-        if(USE_CUDA)
-          target_compile_definitions(tensorpipe_agent PUBLIC USE_CUDA)
-        endif()
-
-        if(USE_ROCM)
-          target_compile_definitions(tensorpipe_agent PRIVATE
-            USE_ROCM
-            __HIP_PLATFORM_HCC__
-          )
-        endif()
-
-        target_compile_definitions(tensorpipe_agent PUBLIC USE_TENSORPIPE)
-        target_link_libraries(tensorpipe_agent PRIVATE tensorpipe)
-        add_dependencies(tensorpipe_agent tensorpipe)
-      endif()
-    endif()
-  endif()
-
   set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
 
   # Generate files
@@ -1236,7 +1189,7 @@ endif()
 if(USE_DISTRIBUTED)
   # Needed to support the inclusion of c10d/Foo.hpp headers.
   target_include_directories(torch_cpu PUBLIC ${TORCH_SRC_DIR}/lib)
-  target_compile_definitions(torch_cpu PRIVATE USE_DISTRIBUTED)
+  target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED)
   if(USE_GLOO AND USE_C10D_GLOO)
     target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO)
   endif()
@@ -1263,16 +1216,12 @@ if(USE_DISTRIBUTED)
   # #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
   # need to be removed when RPC is supported
   if(NOT WIN32)
-    target_compile_definitions(torch_cpu PRIVATE
-      USE_RPC
-    )
+    target_compile_definitions(torch_cpu PUBLIC USE_RPC)
   endif()
   # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp
   # can only be compiled with USE_TENSORPIPE is set.
   if(USE_TENSORPIPE)
-    target_compile_definitions(torch_cpu PRIVATE
-      USE_TENSORPIPE
-    )
+    target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE)
   endif()
 endif()
 
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ab4cd32c40b..70b6d71face 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1377,6 +1377,13 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
     add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
 
     list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
+    if(USE_CUDA)
+      list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS tensorpipe_cuda)
+    elseif(USE_ROCM)
+      message(WARNING "TensorPipe doesn't yet support ROCm")
+      # Not yet...
+      # list(APPEND Caffe2_HIP_DEPENDENCY_LIBS tensorpipe_hip)
+    endif()
   endif()
 endif()
 
diff --git a/test/cpp/rpc/CMakeLists.txt b/test/cpp/rpc/CMakeLists.txt
index 0eff382d2b1..c9fb1b0e7f1 100644
--- a/test/cpp/rpc/CMakeLists.txt
+++ b/test/cpp/rpc/CMakeLists.txt
@@ -5,7 +5,7 @@ set(TORCH_RPC_TEST_SOURCES
   ${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp
 )
 set(TORCH_RPC_TEST_DEPENDENCY_LIBS
-  torch gtest process_group_agent
+  torch gtest
 )
 
 if(USE_GLOO)
@@ -20,7 +20,7 @@ if(USE_TENSORPIPE)
     ${TORCH_RPC_TEST_DIR}/test_tensorpipe_serialization.cpp
   )
   list(APPEND TORCH_RPC_TEST_DEPENDENCY_LIBS
-    tensorpipe_agent tensorpipe
+    tensorpipe
   )
 endif()
 
diff --git a/third_party/tensorpipe b/third_party/tensorpipe
index 42a67277c18..c0e7623adb0 160000
--- a/third_party/tensorpipe
+++ b/third_party/tensorpipe
@@ -1 +1 @@
-Subproject commit 42a67277c1882c90cec0da6e57afb20247424994
+Subproject commit c0e7623adb05f36311c7cde6dac8fc4c290419d9
diff --git a/third_party/tensorpipe.BUILD b/third_party/tensorpipe.BUILD
index d9e4bdb3957..ae210f47393 100644
--- a/third_party/tensorpipe.BUILD
+++ b/third_party/tensorpipe.BUILD
@@ -71,63 +71,82 @@ cc_library(
 )
 
 header_template_rule(
-    name = "tensorpipe_config_header",
+    name = "tensorpipe_cpu_config_header",
     src = "tensorpipe/config.h.in",
     out = "tensorpipe/config.h",
     substitutions = {
-        "#cmakedefine01 TENSORPIPE_HAS_SHM_TRANSPORT": "",
-        "#cmakedefine01 TENSORPIPE_HAS_CMA_CHANNEL": "",
-        "#cmakedefine01 TENSORPIPE_HAS_CUDA_IPC_CHANNEL": "",
-        "#cmakedefine01 TENSORPIPE_HAS_CUDA_GDR_CHANNEL": "",
-        "#cmakedefine01 TENSORPIPE_HAS_IBV_TRANSPORT": "",
-        "#cmakedefine01 TENSORPIPE_SUPPORTS_CUDA": "",
+        "#cmakedefine01 TENSORPIPE_HAS_SHM_TRANSPORT": "#define TENSORPIPE_HAS_SHM_TRANSPORT 1",
+        "#cmakedefine01 TENSORPIPE_HAS_IBV_TRANSPORT": "#define TENSORPIPE_HAS_IBV_TRANSPORT 1",
+        "#cmakedefine01 TENSORPIPE_HAS_CMA_CHANNEL": "#define TENSORPIPE_HAS_CMA_CHANNEL 1",
     },
 )
 
-TENSORPIPE_HEADERS = glob([
-    "tensorpipe/*.h",
-    "tensorpipe/channel/*.h",
-    "tensorpipe/channel/*/*.h",
-    "tensorpipe/common/*.h",
-    "tensorpipe/core/*.h",
-    "tensorpipe/transport/*.h",
-    "tensorpipe/transport/*/*.h",
-    "tensorpipe/util/*/*.h",
-])
+header_template_rule(
+    name = "tensorpipe_cuda_config_header",
+    src = "tensorpipe/config_cuda.h.in",
+    out = "tensorpipe/config_cuda.h",
+    substitutions = {
+        "#cmakedefine01 TENSORPIPE_HAS_CUDA_IPC_CHANNEL": "#define TENSORPIPE_HAS_CUDA_IPC_CHANNEL 1",
+        "#cmakedefine01 TENSORPIPE_HAS_CUDA_GDR_CHANNEL": "#define TENSORPIPE_HAS_CUDA_GDR_CHANNEL 1",
+    },
+)
 
-TENSORPIPE_BASE_SRCS = glob([
-    "tensorpipe/*.cc",
-    "tensorpipe/channel/*.cc",
-    "tensorpipe/common/address.cc",
-    "tensorpipe/common/epoll_loop.cc",
-    "tensorpipe/common/error.cc",
-    "tensorpipe/common/fd.cc",
-    "tensorpipe/common/ibv.cc",
-    "tensorpipe/common/socket.cc",
-    "tensorpipe/common/system.cc",
-    "tensorpipe/core/*.cc",
-    "tensorpipe/transport/*.cc",
-    "tensorpipe/util/*/*.cc",
-])
+# We explicitly list the CUDA headers & sources, and we consider everything else
+# as CPU (using a catch-all glob). This is both because there's fewer CUDA files
+# (thus making it easier to list them exhaustively) and because it will make it
+# more likely to catch a misclassified file: if we forget to mark a file as CUDA
+# we'll try to build it on CPU and that's likely to fail.
 
-TENSORPIPE_SRCS = TENSORPIPE_BASE_SRCS + glob([
-    "tensorpipe/channel/basic/*.cc",
-    "tensorpipe/channel/mpt/*.cc",
-    "tensorpipe/channel/xth/*.cc",
-    "tensorpipe/transport/uv/*.cc",
-])
+TENSORPIPE_CUDA_HEADERS = [
+    "tensorpipe/tensorpipe_cuda.h",
+    "tensorpipe/channel/cuda_basic/*.h",
+    "tensorpipe/channel/cuda_gdr/*.h",
+    "tensorpipe/channel/cuda_ipc/*.h",
+    "tensorpipe/channel/cuda_xth/*.h",
+    "tensorpipe/common/cuda.h",
+    "tensorpipe/common/cuda_buffer.h",
+    "tensorpipe/common/cuda_lib.h",
+    "tensorpipe/common/cuda_loop.h",
+    "tensorpipe/common/nvml_lib.h",
+]
 
-TENSORPIPE_SRCS_CUDA = TENSORPIPE_SRCS + glob([
-    "tensorpipe/common/cuda_loop.cc",
+TENSORPIPE_CUDA_SOURCES = [
     "tensorpipe/channel/cuda_basic/*.cc",
+    "tensorpipe/channel/cuda_gdr/*.cc",
     "tensorpipe/channel/cuda_ipc/*.cc",
     "tensorpipe/channel/cuda_xth/*.cc",
-])
+    "tensorpipe/common/cuda_buffer.cc",
+    "tensorpipe/common/cuda_loop.cc",
+]
+
+TENSORPIPE_CPU_HEADERS = glob(
+    [
+        "tensorpipe/*.h",
+        "tensorpipe/channel/*.h",
+        "tensorpipe/channel/*/*.h",
+        "tensorpipe/common/*.h",
+        "tensorpipe/core/*.h",
+        "tensorpipe/transport/*.h",
+        "tensorpipe/transport/*/*.h",
+    ],
+    exclude=TENSORPIPE_CUDA_HEADERS)
+
+TENSORPIPE_CPU_SOURCES = glob(
+    [
+        "tensorpipe/*.cc",
+        "tensorpipe/channel/*.cc",
+        "tensorpipe/channel/*/*.cc",
+        "tensorpipe/common/*.cc",
+        "tensorpipe/core/*.cc",
+        "tensorpipe/transport/*.cc",
+        "tensorpipe/transport/*/*.cc",
+    ],
+    exclude=TENSORPIPE_CUDA_SOURCES)
 
 cc_library(
-    name = "tensorpipe",
-    srcs = TENSORPIPE_SRCS + [":tensorpipe_config_header"],
-    hdrs = TENSORPIPE_HEADERS,
+    name = "tensorpipe_cpu",
+    srcs = TENSORPIPE_CPU_SOURCES,
+    hdrs = TENSORPIPE_CPU_HEADERS + [":tensorpipe_cpu_config_header"],
     includes = [
         ".",
     ],
@@ -143,8 +162,8 @@ cc_library(
 
 cc_library(
     name = "tensorpipe_cuda",
-    srcs = TENSORPIPE_SRCS_CUDA + [":tensorpipe_config_header"],
-    hdrs = TENSORPIPE_HEADERS,
+    srcs = TENSORPIPE_CUDA_SOURCES,
+    hdrs = TENSORPIPE_CUDA_HEADERS + [":tensorpipe_cuda_config_header"],
     includes = [
         ".",
     ],
@@ -153,8 +172,7 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
-        ":libnop",
-        ":libuv",
+        ":tensorpipe_cpu",
         "@cuda",
     ],
 )
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
index c0b206fa48e..06acafd645e 100644
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -356,12 +356,14 @@ libtorch_distributed_extra_sources = [
     "torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.cpp",
     "torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.cpp",
     "torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.cpp",
+    "torch/csrc/distributed/rpc/agent_utils.cpp",
     "torch/csrc/distributed/rpc/message.cpp",
     "torch/csrc/distributed/rpc/profiler/remote_profiler_manager.cpp",
     "torch/csrc/distributed/rpc/profiler/server_process_global_profiler.cpp",
     "torch/csrc/distributed/rpc/python_call.cpp",
     "torch/csrc/distributed/rpc/python_remote_call.cpp",
     "torch/csrc/distributed/rpc/python_resp.cpp",
+    "torch/csrc/distributed/rpc/process_group_agent.cpp",
     "torch/csrc/distributed/rpc/request_callback.cpp",
     "torch/csrc/distributed/rpc/request_callback_no_python.cpp",
     "torch/csrc/distributed/rpc/rpc_agent.cpp",
@@ -371,6 +373,9 @@ libtorch_distributed_extra_sources = [
     "torch/csrc/distributed/rpc/script_call.cpp",
     "torch/csrc/distributed/rpc/script_remote_call.cpp",
     "torch/csrc/distributed/rpc/script_resp.cpp",
+    "torch/csrc/distributed/rpc/tensorpipe_agent.cpp",
+    "torch/csrc/distributed/rpc/tensorpipe_utils.cpp",
+    "torch/csrc/distributed/rpc/testing/faulty_process_group_agent.cpp",
     "torch/csrc/distributed/rpc/torchscript_functions.cpp",
     "torch/csrc/distributed/rpc/types.cpp",
     "torch/csrc/distributed/rpc/utils.cpp",
@@ -526,6 +531,7 @@ libtorch_cuda_distributed_base_sources = [
 
 # These files are only supported on Linux (and others) but not on Windows.
 libtorch_cuda_distributed_extra_sources = [
+    "torch/csrc/distributed/rpc/tensorpipe_cuda.cpp",
     "torch/lib/c10d/NCCLUtils.cpp",
     "torch/lib/c10d/ProcessGroupNCCL.cpp",
 ]
@@ -714,17 +720,11 @@ libtorch_python_distributed_core_sources = [
 
 libtorch_python_distributed_sources = libtorch_python_distributed_core_sources + [
     "torch/csrc/distributed/autograd/init.cpp",
-    "torch/csrc/distributed/rpc/agent_utils.cpp",
     "torch/csrc/distributed/rpc/init.cpp",
-    "torch/csrc/distributed/rpc/process_group_agent.cpp",
     "torch/csrc/distributed/rpc/py_rref.cpp",
     "torch/csrc/distributed/rpc/python_functions.cpp",
     "torch/csrc/distributed/rpc/python_rpc_handler.cpp",
     "torch/csrc/distributed/rpc/request_callback_impl.cpp",
-    "torch/csrc/distributed/rpc/tensorpipe_agent.cpp",
-    "torch/csrc/distributed/rpc/tensorpipe_cuda.cpp",
-    "torch/csrc/distributed/rpc/tensorpipe_utils.cpp",
-    "torch/csrc/distributed/rpc/testing/faulty_process_group_agent.cpp",
     "torch/csrc/distributed/rpc/testing/init.cpp",
     "torch/csrc/distributed/rpc/unpickled_python_call.cpp",
     "torch/csrc/distributed/rpc/unpickled_python_remote_call.cpp",
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 197926f3098..ce0f16bf5ab 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -261,11 +261,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
 endif()
 
 if(USE_DISTRIBUTED)
-    list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_DISTRIBUTED)
     if(WIN32)
       append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS)
     else()
-      list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_RPC)
       append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS)
     endif()
     # Disable certain warnings for GCC-9.X
@@ -274,10 +272,6 @@ if(USE_DISTRIBUTED)
       set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
       set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
     endif()
-    if(USE_TENSORPIPE)
-      list(APPEND TORCH_PYTHON_LINK_LIBRARIES tensorpipe)
-      list(APPEND TORCH_PYTHON_PUBLIC_COMPILE_DEFINITIONS USE_TENSORPIPE)
-    endif()
     # NCCL is a private dependency of libtorch, but libtorch_python includes
     # some private headers of libtorch, which in turn include NCCL. As a hacky
     # alternative to making NCCL a public dependency of libtorch, we make it
diff --git a/torch/csrc/distributed/rpc/macros.h b/torch/csrc/distributed/rpc/macros.h
deleted file mode 100644
index 2763dd0207b..00000000000
--- a/torch/csrc/distributed/rpc/macros.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-
-#if defined(USE_CUDA) && !defined(__HIP_PLATFORM_HCC__)
-#define USE_CUDA_NOT_ROCM
-#endif
diff --git a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
index 0f6645cdcd5..74c27942565 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
@@ -10,7 +10,6 @@
 #include <tensorpipe/tensorpipe.h>
 
 #include <torch/csrc/distributed/rpc/agent_utils.h>
-#include <torch/csrc/distributed/rpc/macros.h>
 #include <torch/csrc/distributed/rpc/tensorpipe_utils.h>
 #include <torch/csrc/distributed/rpc/utils.h>
 
diff --git a/torch/csrc/distributed/rpc/tensorpipe_agent.h b/torch/csrc/distributed/rpc/tensorpipe_agent.h
index 9462c396b0f..4450792a0f0 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_agent.h
+++ b/torch/csrc/distributed/rpc/tensorpipe_agent.h
@@ -9,7 +9,6 @@
 #include <c10d/PrefixStore.hpp>
 #include <c10d/ProcessGroup.hpp>
 #include <c10d/Store.hpp>
-#include <torch/csrc/distributed/rpc/macros.h>
 #include <torch/csrc/distributed/rpc/rpc_agent.h>
 
 // Forward-declare the TensorPipe classes we need, to avoid including its
diff --git a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
index 9489fcd222b..03ec63d8ddc 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
@@ -1,8 +1,7 @@
-#include <torch/csrc/distributed/rpc/macros.h>
 #include <torch/csrc/distributed/rpc/tensorpipe_agent.h>
 #include <torch/csrc/distributed/rpc/tensorpipe_utils.h>
 
-#if defined(USE_TENSORPIPE) && defined(USE_CUDA_NOT_ROCM)
+#if defined(USE_TENSORPIPE) && !defined(__HIP_PLATFORM_HCC__)
 
 #include <c10/cuda/CUDACachingAllocator.h>
 #include <c10/cuda/CUDAGuard.h>
diff --git a/torch/csrc/distributed/rpc/tensorpipe_utils.cpp b/torch/csrc/distributed/rpc/tensorpipe_utils.cpp
index 55b8554f66d..32f3a132f8f 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_utils.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_utils.cpp
@@ -1,4 +1,3 @@
-#include <torch/csrc/distributed/rpc/macros.h>
 #include <torch/csrc/distributed/rpc/tensorpipe_utils.h>
 
 #ifdef USE_TENSORPIPE
diff --git a/torch/csrc/distributed/rpc/tensorpipe_utils.h b/torch/csrc/distributed/rpc/tensorpipe_utils.h
index ab328b9dca1..bf5d87cacc4 100644
--- a/torch/csrc/distributed/rpc/tensorpipe_utils.h
+++ b/torch/csrc/distributed/rpc/tensorpipe_utils.h
@@ -2,7 +2,6 @@
 
 #ifdef USE_TENSORPIPE
 
-#include <torch/csrc/distributed/rpc/macros.h>
 #include <torch/csrc/distributed/rpc/utils.h>
 
 namespace tensorpipe {