mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Enable CUPTI for kineto by default on windows (#65608)
Summary: Retry of https://github.com/pytorch/pytorch/pull/62175 See https://github.com/pytorch/pytorch/pull/62175#issuecomment-926411151 for more information. malfet gdankel Pull Request resolved: https://github.com/pytorch/pytorch/pull/65608 Reviewed By: zou3519 Differential Revision: D31172530 Pulled By: gdankel fbshipit-source-id: 2c69ed0282c54fa6cdb6e604096d0370e230fd66
This commit is contained in:
parent
eca4f14b6c
commit
6b60884f12
|
|
@ -54,7 +54,7 @@ set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
|
||||||
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
|
set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
|
||||||
set CUDNN_ROOT_DIR=%CUDA_PATH%
|
set CUDNN_ROOT_DIR=%CUDA_PATH%
|
||||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||||
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
|
set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%CUDA_PATH%\extras\CUPTI\lib64;%PATH%
|
||||||
set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
|
set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
|
||||||
set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice
|
set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice
|
||||||
set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll
|
set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll
|
||||||
|
|
|
||||||
|
|
@ -36,13 +36,16 @@ if sys.platform == "win32":
|
||||||
cuda_version_1 = cuda_version.replace('.', '_')
|
cuda_version_1 = cuda_version.replace('.', '_')
|
||||||
cuda_path_var = 'CUDA_PATH_V' + cuda_version_1
|
cuda_path_var = 'CUDA_PATH_V' + cuda_version_1
|
||||||
default_path = 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v' + cuda_version
|
default_path = 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v' + cuda_version
|
||||||
cuda_path = os.path.join(os.getenv(cuda_path_var, default_path), 'bin')
|
cuda_base = os.getenv(cuda_path_var, default_path)
|
||||||
|
cuda_path = os.path.join(cuda_base, 'bin')
|
||||||
|
cupti_path = os.path.join(cuda_base, 'extras', 'CUPTI', 'lib64')
|
||||||
else:
|
else:
|
||||||
cuda_path = ''
|
cuda_path = ''
|
||||||
|
cupti_path = ''
|
||||||
|
|
||||||
import ctypes
|
import ctypes
|
||||||
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
|
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
|
||||||
dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, nvtoolsext_dll_path, cuda_path]))
|
dll_paths = list(filter(os.path.exists, [th_dll_path, py_dll_path, nvtoolsext_dll_path, cuda_path, cupti_path]))
|
||||||
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
|
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
|
||||||
prev_error_mode = kernel32.SetErrorMode(0x0001)
|
prev_error_mode = kernel32.SetErrorMode(0x0001)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1901,7 +1901,7 @@ if(USE_KINETO AND INTERN_BUILD_MOBILE AND USE_LITE_INTERPRETER_PROFILER AND (USE
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(USE_KINETO)
|
if(USE_KINETO)
|
||||||
if((NOT USE_CUDA) OR MSVC)
|
if(NOT USE_CUDA)
|
||||||
set(LIBKINETO_NOCUPTI ON CACHE STRING "" FORCE)
|
set(LIBKINETO_NOCUPTI ON CACHE STRING "" FORCE)
|
||||||
else()
|
else()
|
||||||
set(LIBKINETO_NOCUPTI OFF CACHE STRING "")
|
set(LIBKINETO_NOCUPTI OFF CACHE STRING "")
|
||||||
|
|
@ -1983,6 +1983,7 @@ if(USE_KINETO)
|
||||||
string(APPEND CMAKE_CXX_FLAGS " -DLIBKINETO_NOCUPTI")
|
string(APPEND CMAKE_CXX_FLAGS " -DLIBKINETO_NOCUPTI")
|
||||||
message(STATUS "Configured Kineto (CPU)")
|
message(STATUS "Configured Kineto (CPU)")
|
||||||
else()
|
else()
|
||||||
|
list(APPEND Caffe2_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY})
|
||||||
message(STATUS "Configured Kineto")
|
message(STATUS "Configured Kineto")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import collections
|
||||||
import gc
|
import gc
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import os
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
|
@ -587,8 +588,16 @@ class TestProfiler(TestCase):
|
||||||
assert is_int, "Invalid stacks record"
|
assert is_int, "Invalid stacks record"
|
||||||
|
|
||||||
@unittest.skipIf(not kineto_available(), "Kineto is required")
|
@unittest.skipIf(not kineto_available(), "Kineto is required")
|
||||||
@unittest.skipIf(IS_WINDOWS, "Test is flaky on Windows")
|
|
||||||
def test_tensorboard_trace_handler(self):
|
def test_tensorboard_trace_handler(self):
|
||||||
|
def delayed(func, time_to_sleep=0.005):
|
||||||
|
""""The payload in this test might be too small. tensorboard_trace_handler use time.time()
|
||||||
|
to generate a filename. Delaying it to avoid generate the same filename on Windows.
|
||||||
|
"""
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
time.sleep(time_to_sleep)
|
||||||
|
func(*args, **kwargs)
|
||||||
|
return wrapper
|
||||||
|
|
||||||
use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities()
|
use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities()
|
||||||
with _profile(use_cuda=use_cuda, use_kineto=True):
|
with _profile(use_cuda=use_cuda, use_kineto=True):
|
||||||
self.payload(use_cuda=use_cuda)
|
self.payload(use_cuda=use_cuda)
|
||||||
|
|
@ -605,7 +614,7 @@ class TestProfiler(TestCase):
|
||||||
warmup=1,
|
warmup=1,
|
||||||
active=2,
|
active=2,
|
||||||
repeat=3),
|
repeat=3),
|
||||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(dname)
|
on_trace_ready=delayed(torch.profiler.tensorboard_trace_handler(dname))
|
||||||
) as p:
|
) as p:
|
||||||
for _ in range(18):
|
for _ in range(18):
|
||||||
self.payload(use_cuda=use_cuda)
|
self.payload(use_cuda=use_cuda)
|
||||||
|
|
@ -634,7 +643,7 @@ class TestProfiler(TestCase):
|
||||||
warmup=1,
|
warmup=1,
|
||||||
active=2,
|
active=2,
|
||||||
repeat=3),
|
repeat=3),
|
||||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(dname, use_gzip=True)
|
on_trace_ready=delayed(torch.profiler.tensorboard_trace_handler(dname, use_gzip=True))
|
||||||
)
|
)
|
||||||
p.start()
|
p.start()
|
||||||
for _ in range(18):
|
for _ in range(18):
|
||||||
|
|
|
||||||
|
|
@ -78,11 +78,14 @@ if sys.platform == 'win32':
|
||||||
cuda_version_1 = cuda_version.replace('.', '_')
|
cuda_version_1 = cuda_version.replace('.', '_')
|
||||||
cuda_path_var = 'CUDA_PATH_V' + cuda_version_1
|
cuda_path_var = 'CUDA_PATH_V' + cuda_version_1
|
||||||
default_path = os.path.join(pfiles_path, 'NVIDIA GPU Computing Toolkit', 'CUDA', 'v' + cuda_version)
|
default_path = os.path.join(pfiles_path, 'NVIDIA GPU Computing Toolkit', 'CUDA', 'v' + cuda_version)
|
||||||
cuda_path = os.path.join(os.getenv(cuda_path_var, default_path), 'bin')
|
cuda_base = os.getenv(cuda_path_var, default_path)
|
||||||
|
cuda_path = os.path.join(cuda_base, 'bin')
|
||||||
|
cupti_path = os.path.join(cuda_base, 'extras', 'CUPTI', 'lib64')
|
||||||
else:
|
else:
|
||||||
cuda_path = ''
|
cuda_path = ''
|
||||||
|
cupti_path = ''
|
||||||
|
|
||||||
dll_paths.extend(filter(os.path.exists, [nvtoolsext_dll_path, cuda_path]))
|
dll_paths.extend(filter(os.path.exists, [nvtoolsext_dll_path, cuda_path, cupti_path]))
|
||||||
|
|
||||||
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
|
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
|
||||||
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
|
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user