mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[Reland] Upgrade NVTX to NVTX3 (#97582)
PR #90689 replaces NVTX with NVTX3. However, the torch::nvtoolsext is created only when the third party NVTX is used. This is clear a logical error. We now move the creation code out of the branch to cover all cases. This should fix the issues reported in the comments of #90689. It would be better to move configurations of the failed FRL jobs to CI tests so that we can find such issues early before merging. Pull Request resolved: https://github.com/pytorch/pytorch/pull/97582 Approved by: https://github.com/peterbell10
This commit is contained in:
parent
461c703ee6
commit
5bbfb96203
3
.gitmodules
vendored
3
.gitmodules
vendored
|
|
@ -157,3 +157,6 @@
|
|||
[submodule "third_party/mimalloc"]
|
||||
path = third_party/mimalloc
|
||||
url = https://github.com/microsoft/mimalloc.git
|
||||
[submodule "third_party/NVTX"]
|
||||
path = third_party/NVTX
|
||||
url = https://github.com/NVIDIA/NVTX.git
|
||||
|
|
|
|||
|
|
@ -1516,7 +1516,8 @@ if(USE_CUDA)
|
|||
target_link_libraries(torch_cpu PRIVATE torch::cudart)
|
||||
endif()
|
||||
target_link_libraries(torch_cuda INTERFACE torch::cudart)
|
||||
target_link_libraries(torch_cuda PUBLIC c10_cuda torch::nvtoolsext)
|
||||
target_link_libraries(torch_cuda PUBLIC c10_cuda)
|
||||
target_link_libraries(torch_cuda PRIVATE torch::nvtoolsext)
|
||||
|
||||
target_include_directories(
|
||||
torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
|
|
@ -1571,7 +1572,7 @@ if(BUILD_SHARED_LIBS)
|
|||
# not find them, because they're usually in non-standard locations)
|
||||
if(USE_CUDA)
|
||||
target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext)
|
||||
target_link_libraries(torch_global_deps torch::cudart)
|
||||
endif()
|
||||
if(USE_TBB)
|
||||
target_link_libraries(torch_global_deps TBB::tbb)
|
||||
|
|
|
|||
|
|
@ -129,30 +129,18 @@ endif()
|
|||
|
||||
if(@USE_CUDA@)
|
||||
if(MSVC)
|
||||
if(NOT NVTOOLEXT_HOME)
|
||||
set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt")
|
||||
endif()
|
||||
if(DEFINED ENV{NVTOOLSEXT_PATH})
|
||||
set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH})
|
||||
endif()
|
||||
set(TORCH_CUDA_LIBRARIES
|
||||
${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib
|
||||
${CUDA_LIBRARIES})
|
||||
list(APPEND TORCH_INCLUDE_DIRS ${NVTOOLEXT_HOME}/include)
|
||||
set(TORCH_CUDA_LIBRARIES ${CUDA_LIBRARIES})
|
||||
find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
|
||||
list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
|
||||
elseif(APPLE)
|
||||
set(TORCH_CUDA_LIBRARIES
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib
|
||||
${CUDA_LIBRARIES})
|
||||
else()
|
||||
find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
|
||||
set(TORCH_CUDA_LIBRARIES
|
||||
${CUDA_CUDA_LIB}
|
||||
${CUDA_NVRTC_LIB}
|
||||
${LIBNVTOOLSEXT}
|
||||
${CUDA_LIBRARIES})
|
||||
endif()
|
||||
if(@BUILD_SHARED_LIBS@)
|
||||
|
|
|
|||
|
|
@ -67,10 +67,6 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION STREQUAL CUDAToolkit_VERSION OR
|
|||
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIR}'")
|
||||
endif()
|
||||
|
||||
if(NOT TARGET CUDA::nvToolsExt)
|
||||
message(FATAL_ERROR "Failed to find nvToolsExt")
|
||||
endif()
|
||||
|
||||
message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
|
||||
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
|
||||
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
|
|
@ -216,9 +212,10 @@ endif()
|
|||
|
||||
# nvToolsExt
|
||||
add_library(torch::nvtoolsext INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::nvToolsExt)
|
||||
find_path(nvtx3_dir NAMES nvtx3 PATHS "${CUDA_INCLUDE_DIRS}" "${CMAKE_CURRENT_LIST_DIR}/../../third_party/NVTX/c/include" NO_DEFAULT_PATH)
|
||||
find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
|
||||
target_include_directories(torch::nvtoolsext INTERFACE "${nvtx3_dir}")
|
||||
|
||||
|
||||
# cublas
|
||||
add_library(caffe2::cublas INTERFACE IMPORTED)
|
||||
|
|
|
|||
3
setup.py
3
setup.py
|
|
@ -186,9 +186,6 @@
|
|||
# NVFUSER_SOURCE_DIR
|
||||
# specify nvfuser root directory
|
||||
#
|
||||
# NVTOOLSEXT_PATH (Windows only)
|
||||
# specify where nvtoolsext is installed
|
||||
#
|
||||
# ACL_ROOT_DIR
|
||||
# specify where Compute Library is installed
|
||||
#
|
||||
|
|
|
|||
1
third_party/NVTX
vendored
Submodule
1
third_party/NVTX
vendored
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit e170594ac7cf1dac584da473d4ca9301087090c1
|
||||
4
third_party/nvfuser/csrc/instrumentation.h
vendored
4
third_party/nvfuser/csrc/instrumentation.h
vendored
|
|
@ -2,7 +2,11 @@
|
|||
|
||||
#include <utils.h>
|
||||
|
||||
#ifndef FBCODE_CAFFE2
|
||||
#include <nvtx3/nvToolsExt.h>
|
||||
#else
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
// NOLINTNEXTLINE(modernize-deprecated-headers)
|
||||
#include <stdio.h>
|
||||
|
|
|
|||
|
|
@ -136,7 +136,6 @@ if(USE_CUDA)
|
|||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
|
||||
endif()
|
||||
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
#ifdef _WIN32
|
||||
#include <wchar.h> // _wgetenv for nvtx
|
||||
#endif
|
||||
#ifndef FBCODE_CAFFE2
|
||||
#include <nvtx3/nvToolsExt.h>
|
||||
#else
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
#include <torch/csrc/utils/pybind.h>
|
||||
|
||||
namespace torch::cuda::shared {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
#include <sstream>
|
||||
|
||||
#ifndef FBCODE_CAFFE2
|
||||
#include <nvtx3/nvToolsExt.h>
|
||||
#else
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
|
|
|||
|
|
@ -645,6 +645,7 @@ CUDA_INCLUDE_MAP = collections.OrderedDict(
|
|||
("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/device/device_select.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
|
||||
("nvtx3/nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
|
||||
("nvml.h", ("rocm_smi/rocm_smi.h", CONV_INCLUDE, API_ROCMSMI)),
|
||||
]
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user