diff --git a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat index 69a5c5a586c..ea0a48154fa 100644 --- a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat +++ b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat @@ -54,7 +54,7 @@ set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64 set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH% set CUDNN_ROOT_DIR=%CUDA_PATH% set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt -set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%CUDA_PATH%\extras\CUPTI\lib64;%PATH% +set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH% set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 7bc554f2e8a..ca560288a41 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1901,7 +1901,7 @@ if(USE_KINETO AND INTERN_BUILD_MOBILE AND USE_LITE_INTERPRETER_PROFILER AND (USE endif() if(USE_KINETO) - if(NOT USE_CUDA) + if((NOT USE_CUDA) OR MSVC) set(LIBKINETO_NOCUPTI ON CACHE STRING "" FORCE) else() set(LIBKINETO_NOCUPTI OFF CACHE STRING "") @@ -1983,7 +1983,6 @@ if(USE_KINETO) string(APPEND CMAKE_CXX_FLAGS " -DLIBKINETO_NOCUPTI") message(STATUS "Configured Kineto (CPU)") else() - list(APPEND Caffe2_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY}) message(STATUS "Configured Kineto") endif() endif() diff --git a/test/test_profiler.py b/test/test_profiler.py index d1f6bba1b92..8b9428ec41f 100644 --- a/test/test_profiler.py +++ b/test/test_profiler.py @@ -2,7 +2,6 @@ import collections import gc import io import json -import time import os import unittest @@ -588,16 +587,8 @@ class TestProfiler(TestCase): assert is_int, "Invalid stacks record" @unittest.skipIf(not kineto_available(), "Kineto is required") + @unittest.skipIf(IS_WINDOWS, "Test is flaky on Windows") def test_tensorboard_trace_handler(self): - def delayed(func, time_to_sleep=0.005): - """"The payload in this test might be too small. tensorboard_trace_handler use time.time() - to generate a filename. Delaying it to avoid generate the same filename on Windows. - """ - def wrapper(*args, **kwargs): - time.sleep(time_to_sleep) - func(*args, **kwargs) - return wrapper - use_cuda = torch.profiler.ProfilerActivity.CUDA in supported_activities() with _profile(use_cuda=use_cuda, use_kineto=True): self.payload(use_cuda=use_cuda) @@ -614,7 +605,7 @@ class TestProfiler(TestCase): warmup=1, active=2, repeat=3), - on_trace_ready=delayed(torch.profiler.tensorboard_trace_handler(dname)) + on_trace_ready=torch.profiler.tensorboard_trace_handler(dname) ) as p: for _ in range(18): self.payload(use_cuda=use_cuda) @@ -643,7 +634,7 @@ class TestProfiler(TestCase): warmup=1, active=2, repeat=3), - on_trace_ready=delayed(torch.profiler.tensorboard_trace_handler(dname, use_gzip=True)) + on_trace_ready=torch.profiler.tensorboard_trace_handler(dname, use_gzip=True) ) p.start() for _ in range(18): diff --git a/torch/__init__.py b/torch/__init__.py index 492ed456531..5d02330a5c9 100644 --- a/torch/__init__.py +++ b/torch/__init__.py @@ -78,14 +78,11 @@ if sys.platform == 'win32': cuda_version_1 = cuda_version.replace('.', '_') cuda_path_var = 'CUDA_PATH_V' + cuda_version_1 default_path = os.path.join(pfiles_path, 'NVIDIA GPU Computing Toolkit', 'CUDA', 'v' + cuda_version) - cuda_base = os.getenv(cuda_path_var, default_path) - cuda_path = os.path.join(cuda_base, 'bin') - cupti_path = os.path.join(cuda_base, 'extras', 'CUPTI', 'lib64') + cuda_path = os.path.join(os.getenv(cuda_path_var, default_path), 'bin') else: cuda_path = '' - cupti_path = '' - dll_paths.extend(filter(os.path.exists, [nvtoolsext_dll_path, cuda_path, cupti_path])) + dll_paths.extend(filter(os.path.exists, [nvtoolsext_dll_path, cuda_path])) kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True) with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')