Revert "[Reland2] Update NVTX to NVTX3 (#109843)"

This reverts commit dcb486232d.

Reverted https://github.com/pytorch/pytorch/pull/109843 on behalf of https://github.com/atalman due to Diff broke internal builds and tests ([comment](https://github.com/pytorch/pytorch/pull/109843#issuecomment-1841105398))
This commit is contained in:
PyTorch MergeBot 2023-12-05 16:10:20 +00:00
parent e06bff8bbe
commit ee96399bb4
16 changed files with 56 additions and 25 deletions

View File

@ -66,6 +66,13 @@ set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH% set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
set CUDNN_ROOT_DIR=%CUDA_PATH% set CUDNN_ROOT_DIR=%CUDA_PATH%
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
set CUDNN_ROOT_DIR=%CUDA_PATH%
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH% set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
:cuda_build_end :cuda_build_end

View File

@ -40,6 +40,7 @@ set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH% set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
set CUDNN_ROOT_DIR=%CUDA_PATH% set CUDNN_ROOT_DIR=%CUDA_PATH%
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH% set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice

View File

@ -31,6 +31,6 @@ if ERRORLEVEL 1 exit /b 1
:: Run tests C++-side and load the exported script module. :: Run tests C++-side and load the exported script module.
cd build cd build
set PATH=%TMP_DIR_WIN%\build\torch\lib;%PATH% set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
test_custom_backend.exe model.pt test_custom_backend.exe model.pt
if ERRORLEVEL 1 exit /b 1 if ERRORLEVEL 1 exit /b 1

View File

@ -31,6 +31,6 @@ if ERRORLEVEL 1 exit /b 1
:: Run tests C++-side and load the exported script module. :: Run tests C++-side and load the exported script module.
cd build cd build
set PATH=%TMP_DIR_WIN%\build\torch\lib;%PATH% set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
test_custom_ops.exe model.pt test_custom_ops.exe model.pt
if ERRORLEVEL 1 exit /b 1 if ERRORLEVEL 1 exit /b 1

3
.gitmodules vendored
View File

@ -149,6 +149,3 @@
[submodule "third_party/mimalloc"] [submodule "third_party/mimalloc"]
path = third_party/mimalloc path = third_party/mimalloc
url = https://github.com/microsoft/mimalloc.git url = https://github.com/microsoft/mimalloc.git
[submodule "third_party/NVTX"]
path = third_party/NVTX
url = https://github.com/NVIDIA/NVTX.git

View File

@ -1548,8 +1548,7 @@ if(USE_CUDA)
target_link_libraries(torch_cpu PRIVATE torch::cudart) target_link_libraries(torch_cpu PRIVATE torch::cudart)
endif() endif()
target_link_libraries(torch_cuda INTERFACE torch::cudart) target_link_libraries(torch_cuda INTERFACE torch::cudart)
target_link_libraries(torch_cuda PUBLIC c10_cuda) target_link_libraries(torch_cuda PUBLIC c10_cuda torch::nvtoolsext)
target_link_libraries(torch_cuda PRIVATE torch::nvtoolsext)
target_include_directories( target_include_directories(
torch_cuda INTERFACE $<INSTALL_INTERFACE:include>) torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
@ -1606,7 +1605,7 @@ if(BUILD_SHARED_LIBS)
# not find them, because they're usually in non-standard locations) # not find them, because they're usually in non-standard locations)
if(USE_CUDA) if(USE_CUDA)
target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
target_link_libraries(torch_global_deps torch::cudart) target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext)
endif() endif()
if(USE_TBB) if(USE_TBB)
target_link_libraries(torch_global_deps TBB::tbb) target_link_libraries(torch_global_deps TBB::tbb)

View File

@ -26,6 +26,13 @@ if sys.platform == "win32":
th_root = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'torch') th_root = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'torch')
th_dll_path = os.path.join(th_root, 'lib') th_dll_path = os.path.join(th_root, 'lib')
if not os.path.exists(os.path.join(th_dll_path, 'nvToolsExt64_1.dll')) and \
not os.path.exists(os.path.join(py_dll_path, 'nvToolsExt64_1.dll')):
nvtoolsext_dll_path = os.path.join(
os.getenv('NVTOOLSEXT_PATH', 'C:\\Program Files\\NVIDIA Corporation\\NvToolsExt'), 'bin', 'x64')
else:
nvtoolsext_dll_path = ''
import importlib.util import importlib.util
import glob import glob
spec = importlib.util.spec_from_file_location('torch_version', os.path.join(th_root, 'version.py')) spec = importlib.util.spec_from_file_location('torch_version', os.path.join(th_root, 'version.py'))
@ -43,7 +50,7 @@ if sys.platform == "win32":
import ctypes import ctypes
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True) kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, cuda_path])) dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, nvtoolsext_dll_path, cuda_path]))
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory') with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
prev_error_mode = kernel32.SetErrorMode(0x0001) prev_error_mode = kernel32.SetErrorMode(0x0001)

View File

@ -129,18 +129,30 @@ endif()
if(@USE_CUDA@) if(@USE_CUDA@)
if(MSVC) if(MSVC)
set(TORCH_CUDA_LIBRARIES ${CUDA_LIBRARIES}) if(NOT NVTOOLEXT_HOME)
set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt")
endif()
if(DEFINED ENV{NVTOOLSEXT_PATH})
set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH})
endif()
set(TORCH_CUDA_LIBRARIES
${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib
${CUDA_LIBRARIES})
list(APPEND TORCH_INCLUDE_DIRS ${NVTOOLEXT_HOME}/include)
find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib") find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY}) list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
elseif(APPLE) elseif(APPLE)
set(TORCH_CUDA_LIBRARIES set(TORCH_CUDA_LIBRARIES
${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib ${CUDA_TOOLKIT_ROOT_DIR}/lib/libcudart.dylib
${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib
${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib
${CUDA_LIBRARIES}) ${CUDA_LIBRARIES})
else() else()
find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
set(TORCH_CUDA_LIBRARIES set(TORCH_CUDA_LIBRARIES
${CUDA_CUDA_LIB} ${CUDA_CUDA_LIB}
${CUDA_NVRTC_LIB} ${CUDA_NVRTC_LIB}
${LIBNVTOOLSEXT}
${CUDA_LIBRARIES}) ${CUDA_LIBRARIES})
endif() endif()
if(@BUILD_SHARED_LIBS@) if(@BUILD_SHARED_LIBS@)

View File

@ -66,6 +66,10 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'") "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
endif() endif()
if(NOT TARGET CUDA::nvToolsExt)
message(FATAL_ERROR "Failed to find nvToolsExt")
endif()
message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE}) message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR}) message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
@ -210,15 +214,10 @@ else()
endif() endif()
# nvToolsExt # nvToolsExt
find_path(nvtx3_dir NAMES nvtx3 PATHS "${CUDA_INCLUDE_DIRS}" "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
if(nvtx3_FOUND)
add_library(torch::nvtoolsext INTERFACE IMPORTED) add_library(torch::nvtoolsext INTERFACE IMPORTED)
target_include_directories(torch::nvtoolsext INTERFACE "${nvtx3_dir}") set_property(
else() TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES
message(WARNING "Cannot find NVTX3") CUDA::nvToolsExt)
endif()
# cublas # cublas
add_library(caffe2::cublas INTERFACE IMPORTED) add_library(caffe2::cublas INTERFACE IMPORTED)

View File

@ -189,6 +189,8 @@
# NCCL_INCLUDE_DIR # NCCL_INCLUDE_DIR
# specify where nccl is installed # specify where nccl is installed
# #
# NVTOOLSEXT_PATH (Windows only)
# specify where nvtoolsext is installed
# #
# ACL_ROOT_DIR # ACL_ROOT_DIR
# specify where Compute Library is installed # specify where Compute Library is installed

1
third_party/NVTX vendored

@ -1 +0,0 @@
Subproject commit e170594ac7cf1dac584da473d4ca9301087090c1

View File

@ -130,6 +130,7 @@ if(USE_CUDA)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn) list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN) list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
endif() endif()
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext) list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
endif() endif()

View File

@ -80,6 +80,12 @@ if sys.platform == 'win32':
dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, base_py_dll_path])) dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, base_py_dll_path]))
if all(not os.path.exists(os.path.join(p, 'nvToolsExt64_1.dll')) for p in dll_paths):
nvtoolsext_dll_path = os.path.join(
os.getenv('NVTOOLSEXT_PATH', os.path.join(pfiles_path, 'NVIDIA Corporation', 'NvToolsExt')), 'bin', 'x64')
else:
nvtoolsext_dll_path = ''
from .version import cuda as cuda_version from .version import cuda as cuda_version
import glob import glob
if cuda_version and all(not glob.glob(os.path.join(p, 'cudart64*.dll')) for p in dll_paths): if cuda_version and all(not glob.glob(os.path.join(p, 'cudart64*.dll')) for p in dll_paths):
@ -90,7 +96,7 @@ if sys.platform == 'win32':
else: else:
cuda_path = '' cuda_path = ''
dll_paths.extend(filter(os.path.exists, [cuda_path])) dll_paths.extend(filter(os.path.exists, [nvtoolsext_dll_path, cuda_path]))
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True) kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory') with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
@ -182,6 +188,7 @@ def _load_global_deps() -> None:
'cusolver': 'libcusolver.so.*[0-9]', 'cusolver': 'libcusolver.so.*[0-9]',
'cusparse': 'libcusparse.so.*[0-9]', 'cusparse': 'libcusparse.so.*[0-9]',
'nccl': 'libnccl.so.*[0-9]', 'nccl': 'libnccl.so.*[0-9]',
'nvtx': 'libnvToolsExt.so.*[0-9]',
} }
is_cuda_lib_err = [lib for lib in cuda_libs.values() if(lib.split('.')[0] in err.args[0])] is_cuda_lib_err = [lib for lib in cuda_libs.values() if(lib.split('.')[0] in err.args[0])]
if not is_cuda_lib_err: if not is_cuda_lib_err:

View File

@ -1,7 +1,7 @@
#ifdef _WIN32 #ifdef _WIN32
#include <wchar.h> // _wgetenv for nvtx #include <wchar.h> // _wgetenv for nvtx
#endif #endif
#include <nvtx3/nvToolsExt.h> #include <nvToolsExt.h>
#include <torch/csrc/utils/pybind.h> #include <torch/csrc/utils/pybind.h>
namespace torch::cuda::shared { namespace torch::cuda::shared {
@ -9,7 +9,7 @@ namespace torch::cuda::shared {
void initNvtxBindings(PyObject* module) { void initNvtxBindings(PyObject* module) {
auto m = py::handle(module).cast<py::module>(); auto m = py::handle(module).cast<py::module>();
auto nvtx = m.def_submodule("_nvtx", "nvtx3 bindings"); auto nvtx = m.def_submodule("_nvtx", "libNvToolsExt.so bindings");
nvtx.def("rangePushA", nvtxRangePushA); nvtx.def("rangePushA", nvtxRangePushA);
nvtx.def("rangePop", nvtxRangePop); nvtx.def("rangePop", nvtxRangePop);
nvtx.def("rangeStartA", nvtxRangeStartA); nvtx.def("rangeStartA", nvtxRangeStartA);

View File

@ -1,6 +1,6 @@
#include <sstream> #include <sstream>
#include <nvtx3/nvToolsExt.h> #include <nvToolsExt.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <c10/util/ApproximateClock.h> #include <c10/util/ApproximateClock.h>

View File

@ -647,7 +647,7 @@ CUDA_INCLUDE_MAP = collections.OrderedDict(
("cub/device/device_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("cub/device/device_reduce.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("cub/device/device_select.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)), ("cub/device/device_select.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
("nvtx3/nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)), ("nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
("nvml.h", ("rocm_smi/rocm_smi.h", CONV_INCLUDE, API_ROCMSMI)), ("nvml.h", ("rocm_smi/rocm_smi.h", CONV_INCLUDE, API_ROCMSMI)),
] ]
) )