mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Fixes jiterator cache macro include + updates CUDA note with cache variables (#71452)
Summary:
Per title.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/71452
Reviewed By: ngimel
Differential Revision: D33646495
Pulled By: mruberry
fbshipit-source-id: bbf627e6d7a724a83a3ea2ae9c0f50430f8d578e
(cherry picked from commit d1e72b144a)
This commit is contained in:
parent
125bdb6d51
commit
9b9b878c89
|
|
@ -14,4 +14,6 @@
|
|||
// Currently unsupported on Windows
|
||||
#ifndef _WIN32
|
||||
#define BUILD_JITERATOR_WITH_CACHE true
|
||||
#else
|
||||
#define BUILD_JITERATOR_WITH_CACHE false
|
||||
#endif // _WIN32
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <c10/util/hash.h>
|
||||
#include <c10/util/Optional.h>
|
||||
#include <c10/cuda/CUDACachingAllocator.h>
|
||||
#include <ATen/jit_macros.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <ATen/cuda/detail/OffsetCalculator.cuh>
|
||||
#include <ATen/code_template.h>
|
||||
|
|
@ -727,7 +728,7 @@ std::string generate_code(
|
|||
}
|
||||
|
||||
|
||||
#ifdef BUILD_JITERATOR_WITH_CACHE
|
||||
#if BUILD_JITERATOR_WITH_CACHE
|
||||
// Acquires (possibly creating) the kernel cache directory
|
||||
c10::optional<std::string> get_cache_dir() {
|
||||
// If the environment variable USE_TORCH_KERNEL_CACHE is set to "0" then no persistent cache is used
|
||||
|
|
@ -813,7 +814,7 @@ NvrtcFunction jit_pwise_function(
|
|||
NvrtcFunction compiled_kernel_;
|
||||
std::string name = kernel_name + "_kernel";
|
||||
|
||||
#ifdef BUILD_JITERATOR_WITH_CACHE
|
||||
#if BUILD_JITERATOR_WITH_CACHE
|
||||
static const c10::optional<std::string> cache_dir = get_cache_dir();
|
||||
|
||||
std::string file_path;
|
||||
|
|
@ -927,7 +928,7 @@ NvrtcFunction jit_pwise_function(
|
|||
// TODO: use guards to avoid leaking
|
||||
AT_CUDA_NVRTC_CHECK(nvrtc.nvrtcDestroyProgram(&program));
|
||||
|
||||
#ifdef BUILD_JITERATOR_WITH_CACHE
|
||||
#if BUILD_JITERATOR_WITH_CACHE
|
||||
if (cache_dir.has_value()) {
|
||||
// Writes the program to the cache if caching
|
||||
// NOTE: Actually writes to a per-process temporary file to avoid multi-process contention.
|
||||
|
|
|
|||
|
|
@ -393,6 +393,25 @@ object or a device index, and access one of the above attributes. E.g., to set
|
|||
the capacity of the cache for device ``1``, one can write
|
||||
``torch.backends.cuda.cufft_plan_cache[1].max_size = 10``.
|
||||
|
||||
.. _cuda-just-in-time-compilation:
|
||||
|
||||
Just-in-Time Compilation
|
||||
------------------------
|
||||
|
||||
PyTorch just-in-time compiles some operations, like torch.special.zeta, when
|
||||
performed on CUDA tensors. This compilation can be time consuming
|
||||
(up to a few seconds depending on your hardware and software)
|
||||
and may occur multiple times for a single operator since many PyTorch operators actually
|
||||
select from a variety of kernels, each of which must be compiled once, depending on their input.
|
||||
This compilation occurs once per process, or just once if a kernel cache is used.
|
||||
|
||||
By default, PyTorch creates a kernel cache in $XDG_CACHE_HOME/torch/kernels if
|
||||
XDG_CACHE_HOME is defined and $HOME/.cache/torch/kernels if it's not (except on Windows,
|
||||
where the kernel cache is not yet supported). The caching behavior can be directly
|
||||
controlled with two environment variables. If USE_PYTORCH_KERNEL_CACHE is set to 0 then no
|
||||
cache will be used, and if PYTORCH_KERNEL_CACHE_PATH is set then that path will be used
|
||||
as a kernel cache instead of the default location.
|
||||
|
||||
Best practices
|
||||
--------------
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user