mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "Nvfuser code removal (#111093)"
This reverts commit 572628e520.
Reverted https://github.com/pytorch/pytorch/pull/111093 on behalf of https://github.com/jeanschmidt due to Breaking internal builds, @albanD please help to support the author with the next steps to get this diff merged ([comment](https://github.com/pytorch/pytorch/pull/111093#issuecomment-1771434853))
This commit is contained in:
parent
ca5f6f7af3
commit
715dfced72
|
|
@ -615,7 +615,7 @@ test_libtorch_jit() {
|
||||||
|
|
||||||
# Run jit and lazy tensor cpp tests together to finish them faster
|
# Run jit and lazy tensor cpp tests together to finish them faster
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* && "$TEST_CONFIG" != *nogpu* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *cuda* && "$TEST_CONFIG" != *nogpu* ]]; then
|
||||||
LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy
|
LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/nvfuser_tests cpp/test_lazy
|
||||||
else
|
else
|
||||||
# CUDA tests have already been skipped when CUDA is not available
|
# CUDA tests have already been skipped when CUDA is not available
|
||||||
python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy -k "not CUDA"
|
python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy -k "not CUDA"
|
||||||
|
|
|
||||||
|
|
@ -197,6 +197,9 @@ option(USE_TSAN "Use Thread Sanitizer" OFF)
|
||||||
option(USE_CUDA "Use CUDA" ON)
|
option(USE_CUDA "Use CUDA" ON)
|
||||||
cmake_dependent_option(
|
cmake_dependent_option(
|
||||||
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
||||||
|
cmake_dependent_option(
|
||||||
|
BUILD_NVFUSER "Build NVFUSER" ON
|
||||||
|
"USE_CUDA OR USE_ROCM" OFF)
|
||||||
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
||||||
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
||||||
cmake_dependent_option(
|
cmake_dependent_option(
|
||||||
|
|
@ -1203,6 +1206,19 @@ if(BUILD_JNI)
|
||||||
add_subdirectory(android/pytorch_android)
|
add_subdirectory(android/pytorch_android)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(NOT USE_CUDA AND NOT USE_ROCM)
|
||||||
|
set(BUILD_NVFUSER OFF CACHE BOOL "BUILD nvfuser" FORCE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_NVFUSER)
|
||||||
|
if(DEFINED ENV{NVFUSER_SOURCE_DIR})
|
||||||
|
add_subdirectory($ENV{NVFUSER_SOURCE_DIR} nvfuser)
|
||||||
|
else()
|
||||||
|
add_subdirectory(third_party/nvfuser nvfuser)
|
||||||
|
endif()
|
||||||
|
add_compile_definitions(BUILD_NVFUSER)
|
||||||
|
endif()
|
||||||
|
|
||||||
include(cmake/Summary.cmake)
|
include(cmake/Summary.cmake)
|
||||||
caffe2_print_configuration_summary()
|
caffe2_print_configuration_summary()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -255,6 +255,7 @@ core_sources_full_mobile_no_backend_interface_xplat = [
|
||||||
"torch/csrc/jit/passes/constant_propagation.cpp",
|
"torch/csrc/jit/passes/constant_propagation.cpp",
|
||||||
"torch/csrc/jit/passes/restore_mutation.cpp",
|
"torch/csrc/jit/passes/restore_mutation.cpp",
|
||||||
"torch/csrc/jit/passes/create_autodiff_subgraphs.cpp",
|
"torch/csrc/jit/passes/create_autodiff_subgraphs.cpp",
|
||||||
|
"torch/csrc/jit/passes/cuda_graph_fuser.cpp",
|
||||||
"torch/csrc/jit/passes/dead_code_elimination.cpp",
|
"torch/csrc/jit/passes/dead_code_elimination.cpp",
|
||||||
"torch/csrc/jit/passes/eliminate_no_ops.cpp",
|
"torch/csrc/jit/passes/eliminate_no_ops.cpp",
|
||||||
"torch/csrc/jit/passes/remove_redundant_profiles.cpp",
|
"torch/csrc/jit/passes/remove_redundant_profiles.cpp",
|
||||||
|
|
|
||||||
44
setup.py
44
setup.py
|
|
@ -186,6 +186,9 @@
|
||||||
# NCCL_INCLUDE_DIR
|
# NCCL_INCLUDE_DIR
|
||||||
# specify where nccl is installed
|
# specify where nccl is installed
|
||||||
#
|
#
|
||||||
|
# NVFUSER_SOURCE_DIR
|
||||||
|
# specify nvfuser root directory
|
||||||
|
#
|
||||||
# NVTOOLSEXT_PATH (Windows only)
|
# NVTOOLSEXT_PATH (Windows only)
|
||||||
# specify where nvtoolsext is installed
|
# specify where nvtoolsext is installed
|
||||||
#
|
#
|
||||||
|
|
@ -626,6 +629,11 @@ class build_ext(setuptools.command.build_ext.build_ext):
|
||||||
else:
|
else:
|
||||||
report("-- Not using ITT")
|
report("-- Not using ITT")
|
||||||
|
|
||||||
|
if cmake_cache_vars["BUILD_NVFUSER"]:
|
||||||
|
report("-- Building nvfuser")
|
||||||
|
else:
|
||||||
|
report("-- Not Building nvfuser")
|
||||||
|
|
||||||
# Do not use clang to compile extensions if `-fstack-clash-protection` is defined
|
# Do not use clang to compile extensions if `-fstack-clash-protection` is defined
|
||||||
# in system CFLAGS
|
# in system CFLAGS
|
||||||
c_flags = str(os.getenv("CFLAGS", ""))
|
c_flags = str(os.getenv("CFLAGS", ""))
|
||||||
|
|
@ -725,6 +733,22 @@ class build_ext(setuptools.command.build_ext.build_ext):
|
||||||
os.makedirs(dst_dir)
|
os.makedirs(dst_dir)
|
||||||
self.copy_file(src, dst)
|
self.copy_file(src, dst)
|
||||||
|
|
||||||
|
# Copy nvfuser extension
|
||||||
|
for i, ext in enumerate(self.extensions):
|
||||||
|
if ext.name != "nvfuser._C":
|
||||||
|
continue
|
||||||
|
fullname = self.get_ext_fullname(ext.name)
|
||||||
|
filename = self.get_ext_filename(fullname)
|
||||||
|
fileext = os.path.splitext(filename)[1]
|
||||||
|
src = os.path.join(os.path.dirname(filename), "nvfuser" + fileext)
|
||||||
|
dst = os.path.join(os.path.realpath(self.build_lib), filename)
|
||||||
|
if os.path.exists(src):
|
||||||
|
report(f"Copying {ext.name} from {src} to {dst}")
|
||||||
|
dst_dir = os.path.dirname(dst)
|
||||||
|
if not os.path.exists(dst_dir):
|
||||||
|
os.makedirs(dst_dir)
|
||||||
|
self.copy_file(src, dst)
|
||||||
|
|
||||||
setuptools.command.build_ext.build_ext.build_extensions(self)
|
setuptools.command.build_ext.build_ext.build_extensions(self)
|
||||||
|
|
||||||
def get_outputs(self):
|
def get_outputs(self):
|
||||||
|
|
@ -990,6 +1014,8 @@ def configure_extension_build():
|
||||||
excludes.extend(["caffe2", "caffe2.*"])
|
excludes.extend(["caffe2", "caffe2.*"])
|
||||||
if not cmake_cache_vars["BUILD_FUNCTORCH"]:
|
if not cmake_cache_vars["BUILD_FUNCTORCH"]:
|
||||||
excludes.extend(["functorch", "functorch.*"])
|
excludes.extend(["functorch", "functorch.*"])
|
||||||
|
if not cmake_cache_vars["BUILD_NVFUSER"]:
|
||||||
|
excludes.extend(["nvfuser", "nvfuser.*"])
|
||||||
packages = find_packages(exclude=excludes)
|
packages = find_packages(exclude=excludes)
|
||||||
C = Extension(
|
C = Extension(
|
||||||
"torch._C",
|
"torch._C",
|
||||||
|
|
@ -1023,6 +1049,10 @@ def configure_extension_build():
|
||||||
extensions.append(
|
extensions.append(
|
||||||
Extension(name="functorch._C", sources=[]),
|
Extension(name="functorch._C", sources=[]),
|
||||||
)
|
)
|
||||||
|
if cmake_cache_vars["BUILD_NVFUSER"]:
|
||||||
|
extensions.append(
|
||||||
|
Extension(name="nvfuser._C", sources=[]),
|
||||||
|
)
|
||||||
|
|
||||||
cmdclass = {
|
cmdclass = {
|
||||||
"bdist_wheel": wheel_concatenate,
|
"bdist_wheel": wheel_concatenate,
|
||||||
|
|
@ -1284,6 +1314,8 @@ def main():
|
||||||
"include/torch/csrc/jit/tensorexpr/*.h",
|
"include/torch/csrc/jit/tensorexpr/*.h",
|
||||||
"include/torch/csrc/jit/tensorexpr/operators/*.h",
|
"include/torch/csrc/jit/tensorexpr/operators/*.h",
|
||||||
"include/torch/csrc/jit/codegen/cuda/*.h",
|
"include/torch/csrc/jit/codegen/cuda/*.h",
|
||||||
|
"include/torch/csrc/jit/codegen/cuda/ops/*.h",
|
||||||
|
"include/torch/csrc/jit/codegen/cuda/scheduler/*.h",
|
||||||
"include/torch/csrc/onnx/*.h",
|
"include/torch/csrc/onnx/*.h",
|
||||||
"include/torch/csrc/profiler/*.h",
|
"include/torch/csrc/profiler/*.h",
|
||||||
"include/torch/csrc/profiler/orchestration/*.h",
|
"include/torch/csrc/profiler/orchestration/*.h",
|
||||||
|
|
@ -1325,6 +1357,18 @@ def main():
|
||||||
"utils/model_dump/code.js",
|
"utils/model_dump/code.js",
|
||||||
"utils/model_dump/*.mjs",
|
"utils/model_dump/*.mjs",
|
||||||
]
|
]
|
||||||
|
if get_cmake_cache_vars()["BUILD_NVFUSER"]:
|
||||||
|
torch_package_data.extend(
|
||||||
|
[
|
||||||
|
"share/cmake/nvfuser/*.cmake",
|
||||||
|
"include/nvfuser/*.h",
|
||||||
|
"include/nvfuser/kernel_db/*.h",
|
||||||
|
"include/nvfuser/multidevice/*.h",
|
||||||
|
"include/nvfuser/ops/*.h",
|
||||||
|
"include/nvfuser/python_frontend/*.h",
|
||||||
|
"include/nvfuser/scheduler/*.h",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
if get_cmake_cache_vars()["BUILD_CAFFE2"]:
|
if get_cmake_cache_vars()["BUILD_CAFFE2"]:
|
||||||
torch_package_data.extend(
|
torch_package_data.extend(
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,43 @@ namespace jit {
|
||||||
namespace fuser {
|
namespace fuser {
|
||||||
namespace cuda {
|
namespace cuda {
|
||||||
|
|
||||||
|
class LoadingNvfuserLibrary {
|
||||||
|
public:
|
||||||
|
#ifdef USE_CUDA
|
||||||
|
LoadingNvfuserLibrary() {
|
||||||
|
std::string library_name;
|
||||||
|
if (const char* path = std::getenv("TORCH_NVFUSER_LIBRARY_PATH")) {
|
||||||
|
library_name = path;
|
||||||
|
}
|
||||||
|
#if defined(_WIN32)
|
||||||
|
library_name += "nvfuser_codegen.dll";
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
library_name += "libnvfuser_codegen.dylib";
|
||||||
|
#else
|
||||||
|
library_name += "libnvfuser_codegen.so";
|
||||||
|
#endif
|
||||||
|
try {
|
||||||
|
// NOTE: we need to refactor this to a lazy load instead. We could end up
|
||||||
|
// with double de-allocation with our python API loading the library.
|
||||||
|
// Leaking the handle should solve the problem for now
|
||||||
|
nvfuserLib_ = std::make_shared<at::DynamicLibrary>(
|
||||||
|
library_name.c_str(), nullptr, true);
|
||||||
|
} catch (const c10::DynamicLibraryError& e) {
|
||||||
|
#if defined(BUILD_NVFUSER) || !defined(NDEBUG)
|
||||||
|
TORCH_WARN_ONCE("Loading nvfuser library failed with: ", e.msg());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // USE_CUDA
|
||||||
|
std::shared_ptr<at::DynamicLibrary> nvfuserLib_;
|
||||||
|
};
|
||||||
|
|
||||||
|
static LoadingNvfuserLibrary loading_nvfuser_library_;
|
||||||
|
|
||||||
static std::atomic<bool> cuda_fusion_guard_mode{true};
|
static std::atomic<bool> cuda_fusion_guard_mode{true};
|
||||||
|
|
||||||
bool isEnabled() {
|
bool isEnabled() {
|
||||||
TORCH_WARN_ONCE("torch::jit::fuser::cuda::isEnabled() is deprecated");
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
#include <c10/util/Optional.h>
|
#include <c10/util/Optional.h>
|
||||||
#include <torch/csrc/jit/ir/ir.h>
|
#include <torch/csrc/jit/ir/ir.h>
|
||||||
#include <torch/csrc/jit/jit_log.h>
|
#include <torch/csrc/jit/jit_log.h>
|
||||||
|
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
|
||||||
#include <torch/csrc/jit/passes/quantization/helper.h>
|
#include <torch/csrc/jit/passes/quantization/helper.h>
|
||||||
|
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
|
|
||||||
21
torch/csrc/jit/passes/cuda_graph_fuser.cpp
Normal file
21
torch/csrc/jit/passes/cuda_graph_fuser.cpp
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace torch {
|
||||||
|
namespace jit {
|
||||||
|
|
||||||
|
static CudaFuserComparisonCallback comparison_callback = {false, nullptr};
|
||||||
|
static std::mutex comparison_callback_lock;
|
||||||
|
|
||||||
|
CudaFuserComparisonCallback getCudaFuserComparisonCallback() {
|
||||||
|
std::lock_guard<std::mutex> guard(comparison_callback_lock);
|
||||||
|
return comparison_callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setCudaFuserComparisonCallback(CudaFuserComparisonCallback callback) {
|
||||||
|
std::lock_guard<std::mutex> guard(comparison_callback_lock);
|
||||||
|
comparison_callback = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace jit
|
||||||
|
} // namespace torch
|
||||||
42
torch/csrc/jit/passes/cuda_graph_fuser.h
Normal file
42
torch/csrc/jit/passes/cuda_graph_fuser.h
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ATen/Context.h>
|
||||||
|
#include <torch/csrc/jit/codegen/cuda/interface.h>
|
||||||
|
#include <torch/csrc/jit/ir/ir.h>
|
||||||
|
#include <torch/csrc/jit/passes/pass_manager.h>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace torch {
|
||||||
|
namespace jit {
|
||||||
|
|
||||||
|
// Register CudaFuseGraph in custom passes
|
||||||
|
struct TORCH_API RegisterCudaFuseGraph
|
||||||
|
: public PassManager<RegisterCudaFuseGraph> {
|
||||||
|
static bool registerPass(bool enabled) {
|
||||||
|
TORCH_WARN(
|
||||||
|
"RegisterCudaFuseGraph::registerPass() is deprecated. "
|
||||||
|
"Please use torch::jit::fuser::cuda::setEnabled().");
|
||||||
|
return fuser::cuda::setEnabled(enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isRegistered() {
|
||||||
|
TORCH_WARN(
|
||||||
|
"RegisterCudaFuseGraph::isRegistered() is deprecated. "
|
||||||
|
"Please use torch::jit::fuser::cuda::isEnabled().");
|
||||||
|
return fuser::cuda::isEnabled();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CudaFuserComparisonCallback {
|
||||||
|
using callback_type =
|
||||||
|
std::function<void(const Stack&, const Stack&, const std::string&)>;
|
||||||
|
bool run_fallback;
|
||||||
|
callback_type callback;
|
||||||
|
};
|
||||||
|
|
||||||
|
TORCH_API CudaFuserComparisonCallback getCudaFuserComparisonCallback();
|
||||||
|
TORCH_API void setCudaFuserComparisonCallback(CudaFuserComparisonCallback);
|
||||||
|
|
||||||
|
} // namespace jit
|
||||||
|
} // namespace torch
|
||||||
|
|
@ -857,6 +857,11 @@ class TensorExprFuser {
|
||||||
if (device->is_cpu()) {
|
if (device->is_cpu()) {
|
||||||
return canFuseOnCPU();
|
return canFuseOnCPU();
|
||||||
} else if (device->is_cuda()) {
|
} else if (device->is_cuda()) {
|
||||||
|
#ifndef C10_MOBILE
|
||||||
|
if (fuser::cuda::isEnabled()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return canFuseOnGPU();
|
return canFuseOnGPU();
|
||||||
} else if (device->is_xpu()) {
|
} else if (device->is_xpu()) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@
|
||||||
#include <torch/csrc/jit/passes/constant_propagation.h>
|
#include <torch/csrc/jit/passes/constant_propagation.h>
|
||||||
#include <torch/csrc/jit/passes/create_autodiff_subgraphs.h>
|
#include <torch/csrc/jit/passes/create_autodiff_subgraphs.h>
|
||||||
#include <torch/csrc/jit/passes/create_functional_graphs.h>
|
#include <torch/csrc/jit/passes/create_functional_graphs.h>
|
||||||
|
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
|
||||||
#include <torch/csrc/jit/passes/dbr_quantization/remove_redundant_aliases.h>
|
#include <torch/csrc/jit/passes/dbr_quantization/remove_redundant_aliases.h>
|
||||||
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
||||||
#include <torch/csrc/jit/passes/decompose_ops.h>
|
#include <torch/csrc/jit/passes/decompose_ops.h>
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@
|
||||||
#include <torch/csrc/jit/passes/constant_pooling.h>
|
#include <torch/csrc/jit/passes/constant_pooling.h>
|
||||||
#include <torch/csrc/jit/passes/constant_propagation.h>
|
#include <torch/csrc/jit/passes/constant_propagation.h>
|
||||||
#include <torch/csrc/jit/passes/create_autodiff_subgraphs.h>
|
#include <torch/csrc/jit/passes/create_autodiff_subgraphs.h>
|
||||||
|
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
|
||||||
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
||||||
#include <torch/csrc/jit/passes/decompose_ops.h>
|
#include <torch/csrc/jit/passes/decompose_ops.h>
|
||||||
#include <torch/csrc/jit/passes/graph_fuser.h>
|
#include <torch/csrc/jit/passes/graph_fuser.h>
|
||||||
|
|
@ -645,6 +646,13 @@ const ExecutionPlan& ProfilingGraphExecutorImpl::getOptimizedPlanFor(
|
||||||
// before any other pass that could insert `prim::iprofile_value` node on
|
// before any other pass that could insert `prim::iprofile_value` node on
|
||||||
// `aten::_grad_sum_to_size` input.
|
// `aten::_grad_sum_to_size` input.
|
||||||
InsertProfileNodesForSpecializeAutogradZero(pr_.get());
|
InsertProfileNodesForSpecializeAutogradZero(pr_.get());
|
||||||
|
// `InsertProfileNodesForCUDAFuser` inserts profile node for non-tensor
|
||||||
|
// value
|
||||||
|
#ifndef C10_MOBILE
|
||||||
|
if (torch::jit::fuser::cuda::isEnabled()) {
|
||||||
|
torch::jit::fuser::cuda::InsertProfileNodesForCUDAFuser(pr_.get());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
GRAPH_DUMP("Profiled Graph: ", pr_->graph());
|
GRAPH_DUMP("Profiled Graph: ", pr_->graph());
|
||||||
profiling_plan_ = ExecutionPlan(pr_->graph(), function_name_);
|
profiling_plan_ = ExecutionPlan(pr_->graph(), function_name_);
|
||||||
// fall-through
|
// fall-through
|
||||||
|
|
|
||||||
|
|
@ -207,7 +207,13 @@ void ProfilingRecord::insertShapeProfile(
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool needsProfiledInputs(Node* n) {
|
static bool needsProfiledInputs(Node* n) {
|
||||||
if (tensorexpr::isSupported(n)) {
|
if (tensorexpr::isSupported(n) ||
|
||||||
|
#ifndef C10_MOBILE
|
||||||
|
(fuser::cuda::isEnabled() && fuser::cuda::profileNode(n))
|
||||||
|
#else
|
||||||
|
false
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -238,7 +244,13 @@ static bool needsProfiledInputs(Node* n) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool needsProfiledOutput(Node* n) {
|
static bool needsProfiledOutput(Node* n) {
|
||||||
if (tensorexpr::isSupported(n)) {
|
if (tensorexpr::isSupported(n) ||
|
||||||
|
#ifndef C10_MOBILE
|
||||||
|
(fuser::cuda::isEnabled() && fuser::cuda::profileNode(n))
|
||||||
|
#else
|
||||||
|
false
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user