From 5bbfb96203370f73b4cd28e6ac766a26debce3df Mon Sep 17 00:00:00 2001 From: cyy Date: Mon, 14 Aug 2023 16:55:25 +0000 Subject: [PATCH] [Reland] Upgrade NVTX to NVTX3 (#97582) PR #90689 replaces NVTX with NVTX3. However, the torch::nvtoolsext is created only when the third party NVTX is used. This is clear a logical error. We now move the creation code out of the branch to cover all cases. This should fix the issues reported in the comments of #90689. It would be better to move configurations of the failed FRL jobs to CI tests so that we can find such issues early before merging. Pull Request resolved: https://github.com/pytorch/pytorch/pull/97582 Approved by: https://github.com/peterbell10 --- .gitmodules | 3 +++ caffe2/CMakeLists.txt | 5 +++-- cmake/TorchConfig.cmake.in | 14 +------------- cmake/public/cuda.cmake | 11 ++++------- setup.py | 3 --- third_party/NVTX | 1 + third_party/nvfuser/csrc/instrumentation.h | 4 ++++ torch/CMakeLists.txt | 1 - torch/csrc/cuda/shared/nvtx.cpp | 4 ++++ torch/csrc/profiler/stubs/cuda.cpp | 4 ++++ torch/utils/hipify/cuda_to_hip_mappings.py | 1 + 11 files changed, 25 insertions(+), 26 deletions(-) create mode 160000 third_party/NVTX diff --git a/.gitmodules b/.gitmodules index bea79b8993b..596603d8827 100644 --- a/.gitmodules +++ b/.gitmodules @@ -157,3 +157,6 @@ [submodule "third_party/mimalloc"] path = third_party/mimalloc url = https://github.com/microsoft/mimalloc.git +[submodule "third_party/NVTX"] + path = third_party/NVTX + url = https://github.com/NVIDIA/NVTX.git diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 74d0d557190..4acb29fbf62 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1516,7 +1516,8 @@ if(USE_CUDA) target_link_libraries(torch_cpu PRIVATE torch::cudart) endif() target_link_libraries(torch_cuda INTERFACE torch::cudart) - target_link_libraries(torch_cuda PUBLIC c10_cuda torch::nvtoolsext) + target_link_libraries(torch_cuda PUBLIC c10_cuda) + target_link_libraries(torch_cuda PRIVATE torch::nvtoolsext) target_include_directories( torch_cuda INTERFACE $) @@ -1571,7 +1572,7 @@ if(BUILD_SHARED_LIBS) # not find them, because they're usually in non-standard locations) if(USE_CUDA) target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) - target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext) + target_link_libraries(torch_global_deps torch::cudart) endif() if(USE_TBB) target_link_libraries(torch_global_deps TBB::tbb) diff --git a/cmake/TorchConfig.cmake.in b/cmake/TorchConfig.cmake.in index 6d518a14896..37344dcc62e 100644 --- a/cmake/TorchConfig.cmake.in +++ b/cmake/TorchConfig.cmake.in @@ -129,30 +129,18 @@ endif() if(@USE_CUDA@) if(MSVC) - if(NOT NVTOOLEXT_HOME) - set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt") - endif() - if(DEFINED ENV{NVTOOLSEXT_PATH}) - set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH}) - endif() - set(TORCH_CUDA_LIBRARIES - ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib - ${CUDA_LIBRARIES}) - list(APPEND TORCH_INCLUDE_DIRS ${NVTOOLEXT_HOME}/include) + set(TORCH_CUDA_LIBRARIES ${CUDA_LIBRARIES}) find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib") list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY}) elseif(APPLE) set(TORCH_CUDA_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib - ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib ${CUDA_LIBRARIES}) else() - find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/) set(TORCH_CUDA_LIBRARIES ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} - ${LIBNVTOOLSEXT} ${CUDA_LIBRARIES}) endif() if(@BUILD_SHARED_LIBS@) diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake index 32f3ba375b5..85ce7b319da 100644 --- a/cmake/public/cuda.cmake +++ b/cmake/public/cuda.cmake @@ -67,10 +67,6 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION STREQUAL CUDAToolkit_VERSION OR "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIR}'") endif() -if(NOT TARGET CUDA::nvToolsExt) - message(FATAL_ERROR "Failed to find nvToolsExt") -endif() - message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE}) message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR}) @@ -216,9 +212,10 @@ endif() # nvToolsExt add_library(torch::nvtoolsext INTERFACE IMPORTED) -set_property( - TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES - CUDA::nvToolsExt) +find_path(nvtx3_dir NAMES nvtx3 PATHS "${CUDA_INCLUDE_DIRS}" "${CMAKE_CURRENT_LIST_DIR}/../../third_party/NVTX/c/include" NO_DEFAULT_PATH) +find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir) +target_include_directories(torch::nvtoolsext INTERFACE "${nvtx3_dir}") + # cublas add_library(caffe2::cublas INTERFACE IMPORTED) diff --git a/setup.py b/setup.py index d6baa4bdc81..bcf21b1fbf2 100644 --- a/setup.py +++ b/setup.py @@ -186,9 +186,6 @@ # NVFUSER_SOURCE_DIR # specify nvfuser root directory # -# NVTOOLSEXT_PATH (Windows only) -# specify where nvtoolsext is installed -# # ACL_ROOT_DIR # specify where Compute Library is installed # diff --git a/third_party/NVTX b/third_party/NVTX new file mode 160000 index 00000000000..e170594ac7c --- /dev/null +++ b/third_party/NVTX @@ -0,0 +1 @@ +Subproject commit e170594ac7cf1dac584da473d4ca9301087090c1 diff --git a/third_party/nvfuser/csrc/instrumentation.h b/third_party/nvfuser/csrc/instrumentation.h index cd57825a248..5b27f0d98b9 100644 --- a/third_party/nvfuser/csrc/instrumentation.h +++ b/third_party/nvfuser/csrc/instrumentation.h @@ -2,7 +2,11 @@ #include +#ifndef FBCODE_CAFFE2 +#include +#else #include +#endif // NOLINTNEXTLINE(modernize-deprecated-headers) #include diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 62ee4c12a94..32660cf3d35 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -136,7 +136,6 @@ if(USE_CUDA) list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn) list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN) endif() - list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext) endif() diff --git a/torch/csrc/cuda/shared/nvtx.cpp b/torch/csrc/cuda/shared/nvtx.cpp index 4fb72c5f79b..06f054eb585 100644 --- a/torch/csrc/cuda/shared/nvtx.cpp +++ b/torch/csrc/cuda/shared/nvtx.cpp @@ -1,7 +1,11 @@ #ifdef _WIN32 #include // _wgetenv for nvtx #endif +#ifndef FBCODE_CAFFE2 +#include +#else #include +#endif #include namespace torch::cuda::shared { diff --git a/torch/csrc/profiler/stubs/cuda.cpp b/torch/csrc/profiler/stubs/cuda.cpp index dec87576f36..283f8ec8abb 100644 --- a/torch/csrc/profiler/stubs/cuda.cpp +++ b/torch/csrc/profiler/stubs/cuda.cpp @@ -1,6 +1,10 @@ #include +#ifndef FBCODE_CAFFE2 +#include +#else #include +#endif #include #include diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py index c0ac38dc7c0..a1b182f3946 100644 --- a/torch/utils/hipify/cuda_to_hip_mappings.py +++ b/torch/utils/hipify/cuda_to_hip_mappings.py @@ -645,6 +645,7 @@ CUDA_INCLUDE_MAP = collections.OrderedDict( ("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("cub/device/device_select.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)), + ("nvtx3/nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)), ("nvml.h", ("rocm_smi/rocm_smi.h", CONV_INCLUDE, API_ROCMSMI)), ] )