mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
This PR adds support for tracing autograd.Function with grad. A few important bullet points outlining our approach: 1) Our goal is to verify soundness in order to add a call_function to the autograd.Function's `apply` to the graph. 2) We achieve (1) by either verifying soundness or rejecting soundness, by ensuring that both forward and backward of the autograd.Function are sound. 3) For the forward, if we verify soundness, we install its guards into the graph. 4) For the backward, if we verify soundness, we throw it out. However, backwards soundness verification is more onerous, and has a config driven set of banned attrs and methods for tensors. 1-4 above are achieved by turning the forward and backward into UserDefinedFunctionVariables, and inlining through them, relying on dynamo's soundness detection. If we graph break in these, we raise and treat them as unsound. As noted above, backwards is stricter yet. For the tracing, the safety comes from dynamo's HigherOrderOperator system. That system ensures that not only do we trace soundly, but that no new variables are lifted into inputs during the tracing, and that the forward and backwards are entirely self contained. Whenever we reject a function as unsound, we restore back, as usual. Due to some limitations in the lifting logic, we have an escape hatch we implemented for tensors that are known in forward, but cross into backwards through save_tensors (save) /saved_tensors (load). We escape hatch here to avoid having the known saved tensors coming from forward end up being accidentally treated as lifted variables (and rejected). This is sound, but a little hacky feeling. Additionally, due to some limitations in fx node removal, combined with how we produce subgraphs for the traces installed from HigherOrderOperators, we had to improve our node removal logic. In the event of a restore, we remove the old nodes from the graph, as usual in dynamo. However, because the references to these nodes may exist in subgraphs, we traverse any nodes users and remove them first if and only if they are in another graph. This is always sound, because removal should only be downstream of restoration at this point. Pull Request resolved: https://github.com/pytorch/pytorch/pull/99483 Approved by: https://github.com/zou3519
281 lines
11 KiB
Python
281 lines
11 KiB
Python
import inspect
|
|
import os
|
|
import re
|
|
import sys
|
|
import tempfile
|
|
from os.path import abspath, dirname
|
|
|
|
import torch
|
|
from . import external_utils
|
|
|
|
|
|
# to configure logging for dynamo, aot, and inductor
|
|
# use the following API in the torch._logging module
|
|
# torch._logging.set_logs(dynamo=<level>, aot=<level>, inductor<level>)
|
|
# or use the environment variable TORCH_LOGS="dynamo,aot,inductor" (use a prefix + to indicate higher verbosity)
|
|
# see this design doc for more detailed info
|
|
# Design doc: https://docs.google.com/document/d/1ZRfTWKa8eaPq1AxaiHrq4ASTPouzzlPiuquSBEJYwS8/edit#
|
|
# the name of a file to write the logs to
|
|
log_file_name = None
|
|
|
|
# Verbose will print full stack traces on warnings and errors
|
|
verbose = os.environ.get("TORCHDYNAMO_VERBOSE", "0") == "1"
|
|
|
|
# verify the correctness of optimized backend
|
|
verify_correctness = False
|
|
|
|
# need this many ops to create an FX graph
|
|
minimum_call_count = 1
|
|
|
|
# turn on/off DCE pass
|
|
dead_code_elimination = True
|
|
|
|
# disable (for a function) when cache reaches this size
|
|
cache_size_limit = 64
|
|
|
|
# whether or not to specialize on int inputs. This only has an effect with
|
|
# dynamic_shapes; when dynamic_shapes is False, we ALWAYS specialize on int
|
|
# inputs
|
|
specialize_int = False
|
|
|
|
# Assume these functions return constants
|
|
constant_functions = {
|
|
torch.jit.is_scripting: False,
|
|
torch.jit.is_tracing: False,
|
|
torch._C._get_tracing_state: None,
|
|
torch.fx._symbolic_trace.is_fx_tracing: False,
|
|
torch.onnx.is_in_onnx_export: False,
|
|
external_utils.is_compiling: True,
|
|
torch._utils.is_compiling: True,
|
|
}
|
|
|
|
# don't specialize on shapes and strides and put shape ops in graph
|
|
dynamic_shapes = os.environ.get("TORCHDYNAMO_DYNAMIC_SHAPES") == "1"
|
|
|
|
# This is a temporarily flag, which changes the behavior of dynamic_shapes=True.
|
|
# When assume_static_by_default is True, we only allocate symbols for shapes marked dynamic via mark_dynamic.
|
|
# NOTE - this flag can be removed once we can run dynamic_shapes=False w/ the mark_dynamic API
|
|
# see [Note - on the state of mark_dynamic]
|
|
assume_static_by_default = True
|
|
|
|
# This flag changes how dynamic_shapes=True works, and is meant to be used in conjunction
|
|
# with assume_static_by_default=True.
|
|
# With this flag enabled, we always compile a frame as fully static for the first time, and, if we fail
|
|
# any guards due to wobbles in shape, we recompile with *all* the wobbled shapes as being marked dynamic.
|
|
automatic_dynamic_shapes = True
|
|
|
|
# Typically, if you mark_dynamic a dimension, we will error if the dimension
|
|
# actually ended up getting specialized. This knob changes the behavior so
|
|
# that we don't error at all. This is helpful for our CI where I'm using a
|
|
# heuristic to mark batch dimensions as dynamic and the heuristic may get it
|
|
# wrong.
|
|
allow_ignore_mark_dynamic = False
|
|
|
|
# Set this to False to assume nn.Modules() contents are immutable (similar assumption as freezing)
|
|
guard_nn_modules = False
|
|
|
|
# This feature doesn't really work. We offer this flag for experimental
|
|
# purposes / if you want to help us build out support.
|
|
#
|
|
# torchdynamo has very limited support for tensor subclasses that implement
|
|
# __torch_function__. Our current support is limited to tensor subclasses
|
|
# that DO NOT store metadata on the tensor (in general, dynamo does not
|
|
# support Python code that stores extra attributes on tensors at present).
|
|
# If your tensor subclass purely changes function call behavior via
|
|
# __torch_function__, you can allow torchdynamo to trace into it by
|
|
# adding it to traceable_tensor_subclasses. We don't do any safety checks,
|
|
# so it is up to you to ensure that your subclass is well behaved. See also
|
|
# https://github.com/pytorch/torchdynamo/issues/1948
|
|
#
|
|
# We do NOT currently support __torch_dispatch__. The implementation is
|
|
# currently buggy, the main show stopper for nontrivial use is
|
|
# https://github.com/pytorch/torchdynamo/issues/1952
|
|
traceable_tensor_subclasses = set()
|
|
|
|
# Suppress errors in torch._dynamo.optimize, instead forcing a fallback to eager.
|
|
# This is a good way to get your model to work one way or another, but you may
|
|
# lose optimization opportunities this way. Devs, if your benchmark model is failing
|
|
# this way, you should figure out why instead of suppressing it.
|
|
suppress_errors = bool(os.environ.get("TORCHDYNAMO_SUPPRESS_ERRORS", False))
|
|
|
|
# Record and write an execution record of the current frame to a file
|
|
# if an exception is encountered
|
|
replay_record_enabled = os.environ.get("TORCH_COMPILE_DEBUG", "0") == "1"
|
|
|
|
# Rewrite assert statement in python with torch._assert
|
|
rewrite_assert_with_torch_assert = True
|
|
|
|
# Show a warning on every graph break
|
|
print_graph_breaks = False
|
|
|
|
# Show a warning for every specialization
|
|
print_specializations = False
|
|
|
|
# Simplify guards, summarizing static and dynamic constraints on dimensions.
|
|
# NOTE: This only has an effect when dynamic_shapes=True.
|
|
summarize_dim_constraints = False
|
|
|
|
# Disable dynamo
|
|
disable = os.environ.get("TORCH_COMPILE_DISABLE", False)
|
|
|
|
# If a PyTorch module is in this allowlist, torchdynamo will be allowed
|
|
# to inline objects from it or its children.
|
|
skipfiles_inline_module_allowlist = {
|
|
torch.nn,
|
|
torch.distributions,
|
|
torch.testing,
|
|
torch.ao.nn,
|
|
torch._refs,
|
|
torch._prims,
|
|
torch._decomp,
|
|
torch.utils._contextlib,
|
|
}
|
|
|
|
# If a string representing a PyTorch module is in this ignorelist,
|
|
# the `allowed_functions.is_allowed` function will not consider it
|
|
# when creating a list of PyTorch functions that will appear in
|
|
# FX IR.
|
|
allowed_functions_module_string_ignorelist = {
|
|
"torch.distributions",
|
|
"torch.testing",
|
|
"torch._refs",
|
|
"torch._prims",
|
|
"torch._decomp",
|
|
}
|
|
|
|
# Debug Flag to try minifier at different stages. Possible values are {None, "aot", "dynamo"}
|
|
# None - Minifier is switched off
|
|
# dynamo - Runs minifier on the TorchDynamo produced graphs, if compilation fails
|
|
# aot - Runs minifier on the Aot Autograd produced graphs, if compilation fails
|
|
repro_after = os.environ.get("TORCHDYNAMO_REPRO_AFTER", None)
|
|
# Compiler compilation debug info
|
|
# 1: Dumps the original graph out to repro.py if compilation fails
|
|
# 2: Dumps a minifier_launcher.py if compilation fails.
|
|
# 3: Always dumps a minifier_launcher.py. Good for segfaults.
|
|
# 4: Dumps a minifier_launcher.py if the accuracy fails.
|
|
repro_level = int(os.environ.get("TORCHDYNAMO_REPRO_LEVEL", 2))
|
|
|
|
# By default, we try to detect accuracy failure by running both forward
|
|
# and backward of a torchdynamo produced graph (if you are using repro_after
|
|
# 'dynamo'). This setting forces us to only test the forward graph and
|
|
# not the backward graph. This can be helpful if you're trying to debug
|
|
# an inference only problem, but the minifier seems to be choking on the
|
|
# backwards step
|
|
# TODO: Detect this situation automatically so the user doesn't need
|
|
# to manually configure this
|
|
repro_forward_only = os.environ.get("TORCHDYNAMO_REPRO_FORWARD_ONLY") == "1"
|
|
|
|
# The tolerance we should use when testing if a compiled graph
|
|
# has diverged so that we should treat it as an accuracy failure
|
|
repro_tolerance = 1e-3
|
|
|
|
# If True, when testing if two models are the same, we will test them against
|
|
# a third fp64 reference and only report a problem if the RMSE relative to the
|
|
# fp64 is greater. However, this will use more memory; you may disable this
|
|
# if memory usage is too high.
|
|
same_two_models_use_fp64 = True
|
|
|
|
# Not all backends support scalars. Some calls on torch.Tensor (like .item()) return a scalar type.
|
|
# When this flag is set to False, we introduce a graph break instead of capturing.
|
|
# This requires dynamic_shapes to be True.
|
|
capture_scalar_outputs = False
|
|
|
|
# Not all backends support operators that have dynamic output shape (e.g.,
|
|
# nonzero, unique). When this flag is set to False, we introduce a graph
|
|
# break instead of capturing. This requires dynamic_shapes to be True.
|
|
# If you set this to True, you probably also want capture_scalar_outputs
|
|
# (these are separated for historical reasons).
|
|
capture_dynamic_output_shape_ops = False
|
|
|
|
# Should almost always be true in prod. This relaxes the requirement that cond's true_fn and
|
|
# false_fn produces code with identical guards.
|
|
enforce_cond_guards_match = True
|
|
|
|
# Automatically split model graph into pieces to match DDP bucket sizes
|
|
# to allow DDP comm/compute overlap. Disable to allow DDP models to
|
|
# run without graph-breaks, but also without comm/compute overlap.
|
|
# set torch._dynamo.config.log_level to INFO or DEBUG for more info
|
|
# about optimize_ddp behavior.
|
|
optimize_ddp = True
|
|
|
|
# Whether to skip guarding on FSDP-managed modules
|
|
skip_fsdp_guards = True
|
|
|
|
# Make dynamo skip guarding on hooks on nn modules
|
|
# Note: unsafe: if your model actually has hooks and you remove them, or doesn't and you add them,
|
|
# dynamo will not notice and will execute whichever version you first compiled.
|
|
skip_nnmodule_hook_guards = True
|
|
|
|
# If True, raises exception if TorchDynamo is called with a context manager
|
|
raise_on_ctx_manager_usage = True
|
|
|
|
# If True, raise when aot autograd is unsafe to use
|
|
raise_on_unsafe_aot_autograd = False
|
|
|
|
# Throw an error if backend changes without reset
|
|
raise_on_backend_change = False
|
|
|
|
# If true, error with a better message if we symbolically trace over a
|
|
# dynamo-optimized function. If false, silently suppress dynamo.
|
|
error_on_nested_fx_trace = True
|
|
|
|
# Disables graph breaking on rnn. YMMV with backends.
|
|
allow_rnn = False
|
|
|
|
# If true, error if we try to compile a function that has
|
|
# been seen before.
|
|
error_on_recompile = False
|
|
|
|
# reports why guards fail. Useful to identify the guards failing frequently and
|
|
# causing recompilations.
|
|
report_guard_failures = os.environ.get("TORCHDYNAMO_REPORT_GUARD_FAILURES") == "1"
|
|
|
|
# root folder of the project
|
|
base_dir = dirname(dirname(dirname(abspath(__file__))))
|
|
|
|
# trace through numpy ndarray as tensor and try to translate numpy function to torch function.
|
|
numpy_ndarray_as_tensor = False
|
|
|
|
|
|
def is_fbcode():
|
|
return not hasattr(torch.version, "git_version")
|
|
|
|
|
|
DEBUG_DIR_VAR_NAME = "TORCH_COMPILE_DEBUG_DIR"
|
|
|
|
if DEBUG_DIR_VAR_NAME in os.environ:
|
|
debug_dir_root = os.path.join(os.environ[DEBUG_DIR_VAR_NAME], "torch_compile_debug")
|
|
elif is_fbcode():
|
|
debug_dir_root = os.path.join(tempfile.gettempdir(), "torch_compile_debug")
|
|
else:
|
|
debug_dir_root = os.path.join(os.getcwd(), "torch_compile_debug")
|
|
|
|
|
|
_save_config_ignore = {
|
|
"repro_after",
|
|
"repro_level",
|
|
# workaround: "cannot pickle PyCapsule"
|
|
"constant_functions",
|
|
# workaround: "cannot pickle module"
|
|
"skipfiles_inline_module_allowlist",
|
|
}
|
|
|
|
capture_autograd_function = True
|
|
|
|
_autograd_backward_strict_mode_banned_ops = [
|
|
"stride",
|
|
"requires_grad",
|
|
"storage_offset",
|
|
"layout",
|
|
"data",
|
|
]
|
|
|
|
_autograd_backward_strict_mode_banned_ops.extend(
|
|
[name for name, _ in inspect.getmembers(torch.Tensor) if re.match(r"^is_.*", name)]
|
|
)
|
|
|
|
|
|
from .config_utils import install_config_module
|
|
|
|
install_config_module(sys.modules[__name__])
|