Enable xdoctest runner in CI for real this time (#83816)

Builds on #83317 and enables running the doctests. Just need to figure out what is causing the failures.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/83816
Approved by: https://github.com/ezyang, https://github.com/malfet
This commit is contained in:
joncrall 2022-12-29 05:32:42 +00:00 committed by PyTorch MergeBot
parent fb4fc0dabe
commit ad782ff7df
90 changed files with 456 additions and 262 deletions

View File

@ -179,9 +179,9 @@ pytest-rerunfailures
#Pinned versions:
#test that import:
xdoctest==1.0.2
xdoctest==1.1.0
#Description: runs doctests in pytest
#Pinned versions: 1.0.2
#Pinned versions: 1.1.0
#test that import:
pygments==2.12.0

View File

@ -19,4 +19,4 @@ pytest-shard==0.1.2
scipy==1.9.0
sympy==1.11.1
unittest-xml-reporting<=3.2.0,>=2.0.0
xdoctest==1.0.2
xdoctest==1.1.0

View File

@ -4,8 +4,11 @@ This script simply runs the torch doctests via the xdoctest runner.
This must be run from the root of the torch repo, as it needs the path to the
torch source code.
"
This script is provided as a developer convenience. On the CI the doctests are
invoked in 'run_test.py'
"
# To simply list tests
# xdoctest -m torch --style=google list
# Reference: https://stackoverflow.com/questions/59895/bash-script-dir
@ -16,14 +19,10 @@ echo "TORCH_MODPATH = $TORCH_MODPATH"
if [[ ! -d "$TORCH_MODPATH" ]] ; then
echo "Could not find the path to the torch module"
else
# Next version of xdoctest will support environment variables that overlo
export XDOCTEST_GLOBAL_EXEC="from torch import nn\nimport torch.nn.functional as F\nimport torch"
export XDOCTEST_OPTIONS="+IGNORE_WHITESPACE"
# Note: google wont catch numpy style docstrings (a few exist) but it also wont fail
# on things not intended to be doctests.
export XDOCTEST_STYLE="google"
xdoctest "$TORCH_MODPATH" --style="$XDOCTEST_STYLE" --global-exec "$XDOCTEST_GLOBAL_EXEC" --options="$XDOCTEST_OPTIONS"
xdoctest torch "$TORCH_MODPATH" --style="$XDOCTEST_STYLE" --global-exec "$XDOCTEST_GLOBAL_EXEC" --options="$XDOCTEST_OPTIONS"
fi

View File

@ -659,10 +659,9 @@ def run_doctests(test_module, test_directory, options):
import pathlib
pkgpath = pathlib.Path(torch.__file__).parent
#
enabled = {
# TODO: expose these options to the user
# Temporary disable all feature-conditional tests
# For now disable all feature-conditional tests
# 'lapack': 'auto',
# 'cuda': 'auto',
# 'cuda1': 'auto',
@ -671,6 +670,9 @@ def run_doctests(test_module, test_directory, options):
'cuda': 0,
'cuda1': 0,
'qengine': 0,
'autograd_profiler': 0,
'cpp_ext': 0,
'monitor': 0,
}
# Resolve "auto" based on a test to determine if the feature is available.
@ -707,13 +709,34 @@ def run_doctests(test_module, test_directory, options):
if enabled['qengine']:
os.environ['TORCH_DOCTEST_QENGINE'] = '1'
if enabled['autograd_profiler']:
os.environ['TORCH_DOCTEST_AUTOGRAD_PROFILER'] = '1'
if enabled['cpp_ext']:
os.environ['TORCH_DOCTEST_CPP_EXT'] = '1'
if enabled['monitor']:
os.environ['TORCH_DOCTEST_MONITOR'] = '1'
if 0:
# TODO: could try to enable some of these
os.environ['TORCH_DOCTEST_QUANTIZED_DYNAMIC'] = '1'
os.environ['TORCH_DOCTEST_ANOMOLY'] = '1'
os.environ['TORCH_DOCTEST_AUTOGRAD'] = '1'
os.environ['TORCH_DOCTEST_HUB'] = '1'
os.environ['TORCH_DOCTEST_DATALOADER'] = '1'
os.environ['TORCH_DOCTEST_ONNX'] = '1'
os.environ['TORCH_DOCTEST_FUTURES'] = '1'
pkgpath = os.path.dirname(torch.__file__)
xdoctest_config = {
'global_exec': r'\n'.join([
'from torch import nn',
'import torch.nn.functional as F',
'import torch',
]),
'analysis': 'static', # set to "auto" to test doctests in compiled modules
'style': 'google',
'options': '+IGNORE_WHITESPACE',
}
@ -1016,7 +1039,7 @@ def parse_args():
)
parser.add_argument(
"--xdoctest-command",
default='list',
default='all',
help=(
"Control the specific doctest action. "
"Use 'list' to simply parse doctests and check syntax. "

View File

@ -627,10 +627,10 @@ def use_deterministic_algorithms(mode, *, warn_only=False):
Example::
>>> # xdoctest: +SKIP
>>> torch.use_deterministic_algorithms(True)
# Forward mode nondeterministic error
>>> # xdoctest: +SKIP
>>> torch.randn(10, device='cuda').kthvalue(0)
...
RuntimeError: kthvalue CUDA does not have a deterministic implementation...

View File

@ -251,6 +251,7 @@ def vjp(func: Callable, *primals, has_aux: bool = False):
Case 2: Using ``vjp`` inside ``torch.no_grad`` context manager:
>>> # xdoctest: +SKIP(failing)
>>> with torch.no_grad():
>>> vjp(f)(x)
@ -1286,6 +1287,7 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla
Example of using ``grad``:
>>> # xdoctest: +SKIP
>>> from torch.func import grad
>>> x = torch.randn([])
>>> cos_x = grad(lambda x: torch.sin(x))(x)
@ -1297,6 +1299,7 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla
When composed with ``vmap``, ``grad`` can be used to compute per-sample-gradients:
>>> # xdoctest: +SKIP
>>> from torch.func import grad, vmap
>>> batch_size, feature_size = 3, 5
>>>
@ -1317,6 +1320,7 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla
Example of using ``grad`` with ``has_aux`` and ``argnums``:
>>> # xdoctest: +SKIP
>>> from torch.func import grad
>>> def my_loss_func(y, y_pred):
>>> loss_per_sample = (0.5 * y_pred - y) ** 2
@ -1327,13 +1331,14 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla
>>> y_true = torch.rand(4)
>>> y_preds = torch.rand(4, requires_grad=True)
>>> out = fn(y_true, y_preds)
>>> > output is ((grads w.r.t y_true, grads w.r.t y_preds), (y_pred, loss_per_sample))
>>> # > output is ((grads w.r.t y_true, grads w.r.t y_preds), (y_pred, loss_per_sample))
.. note::
Using PyTorch ``torch.no_grad`` together with ``grad``.
Case 1: Using ``torch.no_grad`` inside a function:
>>> # xdoctest: +SKIP
>>> def f(x):
>>> with torch.no_grad():
>>> c = x ** 2
@ -1343,6 +1348,7 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla
Case 2: Using ``grad`` inside ``torch.no_grad`` context manager:
>>> # xdoctest: +SKIP
>>> with torch.no_grad():
>>> grad(f)(x)
@ -1433,11 +1439,12 @@ def functionalize(func: Callable, *, remove: str = 'mutations') -> Callable:
Example::
>>> # xdoctest: +SKIP
>>> import torch
>>> from torch.fx.experimental.proxy_tensor import make_fx
>>> from torch.func import functionalize
>>>
>>> A function that uses mutations and views, but only on intermediate tensors.
>>> # A function that uses mutations and views, but only on intermediate tensors.
>>> def f(a):
... b = a + 1
... c = b.view(-1)
@ -1490,17 +1497,17 @@ def functionalize(func: Callable, *, remove: str = 'mutations') -> Callable:
return view_copy_1
>>> A function that mutates its input tensor
>>> # A function that mutates its input tensor
>>> def f(a):
... b = a.view(-1)
... b.add_(1)
... return a
...
>>> f_no_mutations_and_views_traced = make_fx(functionalize(f, remove='mutations_and_views'))(inpt)
>>>
>>> All mutations and views have been removed,
>>> but there is an extra copy_ in the graph to correctly apply the mutation to the input
>>> after the function has completed.
>>> #
>>> # All mutations and views have been removed,
>>> # but there is an extra copy_ in the graph to correctly apply the mutation to the input
>>> # after the function has completed.
>>> print(f_no_mutations_and_views_traced.code)

View File

@ -69,6 +69,7 @@ def minifier(fail_f: fx.GraphModule, inps, module_fails, dump_state: Callable =
2. Delta Debugging: Tries replacing half of the graph with inputs. If fails,
tries replacing quarter of the graph, etc.
>>> # xdoctest: +SKIP(failing)
>>> failing_function = fx.symbolic_trace(f)
>>> minimize(failing_function, [torch.randn(5)], lambda fx_g, inps: fx_g(*inps))

View File

@ -122,10 +122,12 @@ def update_names(tensor, names, rename_map, inplace):
For example,
```
>>> # xdoctest: +SKIP
>>> x = torch.empty(2, 3, 5, 7, names=('N', 'C', 'H', 'W'))
>>> x.rename('...', 'height', 'width').names
('N', 'C', 'height', 'width')
>>> # xdoctest: +SKIP
>>> x.rename('batch', '...', 'width').names
('batch', 'C', 'H', 'width')
@ -136,6 +138,7 @@ def update_names(tensor, names, rename_map, inplace):
For example,
```
>>> # xdoctest: +SKIP
>>> x = torch.empty(2, 3, 5, 7, names=('N', 'C', 'H', 'W'))
>>> x.rename(W='width', H='height').names
('N', 'C', 'height', 'width')

View File

@ -1496,6 +1496,7 @@ def compute_required_storage_length(
>>> compute_required_storage_length(t.shape, t.stride(), t.storage_offset())
200
>>> # xdoctest: +SKIP(failing)
>>> t2 = torch.empty_strided((1, 2, 3), (5, 7, 11))
>>> size = compute_required_storage_length(t2.shape, t2.stride(), t2.storage_offset())
>>> size == t.storage().size()

View File

@ -215,7 +215,6 @@ def _vector_str(self, indent, summarize, formatter1, formatter2=None):
elements_per_line = max(
1, int(math.floor((PRINT_OPTS.linewidth - indent) / (element_length)))
)
# char_per_line = element_length * elements_per_line # unused
def _val_formatter(val, formatter1=formatter1, formatter2=formatter2):
if formatter2 is not None:

View File

@ -9,6 +9,7 @@ from torch.utils._pytree import _broadcast_to_and_flatten, tree_flatten, tree_un
in_dims_t = Union[int, Tuple]
out_dims_t = Union[int, Tuple[int, ...]]
# Checks that all args-to-be-batched have the same batch dim size
def _validate_and_get_batch_size(
flat_in_dims: List[Optional[int]], flat_args: List

View File

@ -19,9 +19,9 @@ class LinearReLU(nnqd.Linear):
Examples::
>>> # xdoctest: +SKIP
>>> m = nn.intrinsic.quantized.dynamic.LinearReLU(20, 30)
>>> input = torch.randn(128, 20)
>>> # xdoctest: +SKIP
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])

View File

@ -56,6 +56,7 @@ class LinearLeakyReLU(nnq.Linear):
Same as torch.nn.quantized.Linear
+ negative_slope
Examples::
>>> # xdoctest: +SKIP
>>> m = nn.intrinsic.LinearLeakyReLU(20, 30, 0.01)
>>> input = torch.randn(128, 20)
>>> output = m(input)

View File

@ -15,6 +15,7 @@ import warnings
__all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d']
class Conv1d(nnq.Conv1d):
r"""A dynamically quantized conv module with floating point tensors as inputs and outputs.
@ -31,9 +32,9 @@ class Conv1d(nnq.Conv1d):
Examples::
>>> # xdoctest: +SKIP
>>> m = nn.quantized.dynamic.Conv1d(16, 33, 3, stride=2)
>>> input = torch.randn(20, 16, 100)
>>> # xdoctest: +SKIP
>>> output = m(input)
"""
@ -102,6 +103,7 @@ class Conv2d(nnq.Conv2d):
Examples::
>>> # xdoctest: +SKIP
>>> # With square kernels and equal stride
>>> m = nn.quantized.dynamic.Conv2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
@ -109,7 +111,6 @@ class Conv2d(nnq.Conv2d):
>>> # non-square kernels and unequal stride and with padding and dilation
>>> m = nn.quantized.dynamic.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
>>> input = torch.randn(20, 16, 50, 100)
>>> # xdoctest: +SKIP
>>> output = m(input)
"""
@ -167,6 +168,7 @@ class Conv3d(nnq.Conv3d):
Examples::
>>> # xdoctest: +SKIP
>>> # With square kernels and equal stride
>>> m = nn.quantized.dynamic.Conv3d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
@ -174,7 +176,6 @@ class Conv3d(nnq.Conv3d):
>>> # non-square kernels and unequal stride and with padding and dilation
>>> m = nn.quantized.dynamic.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2), dilation=(1, 2, 2))
>>> input = torch.randn(20, 16, 56, 56, 56)
>>> # xdoctest: +SKIP
>>> output = m(input)
"""
@ -233,8 +234,8 @@ class ConvTranspose1d(nnq.ConvTranspose1d):
Examples::
>>> # With square kernels and equal stride
>>> # xdoctest: +SKIP
>>> # With square kernels and equal stride
>>> m = nndq.ConvTranspose1d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nndq.ConvTranspose1d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
@ -294,11 +295,11 @@ class ConvTranspose2d(nnq.ConvTranspose2d):
Examples::
>>> # xdoctest: +SKIP
>>> # With square kernels and equal stride
>>> m = nnq.ConvTranspose2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
>>> m = nnq.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
>>> # xdoctest: +SKIP
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> downsample = nnq.Conv2d(16, 16, 3, stride=2, padding=1)
@ -355,11 +356,11 @@ class ConvTranspose3d(nnq.ConvTranspose3d):
Examples::
>>> # xdoctest: +SKIP
>>> # With cubic kernels and equal stride
>>> m = nnq.ConvTranspose3d(16, 33, 3, stride=2)
>>> # non-cubic kernels and unequal stride and with padding
>>> m = nnq.ConvTranspose3d(16, 33, (3, 3, 5), stride=(2, 1, 1), padding=(4, 2, 2))
>>> # xdoctest: +SKIP
>>> output = m(input)
>>> # exact output size can be also specified as an argument
>>> downsample = nnq.Conv3d(16, 16, 3, stride=2, padding=1)

View File

@ -7,6 +7,7 @@ __all__ = [
"Linear",
]
class Linear(nnq.Linear):
r"""
A dynamic quantized linear module with floating point tensor as inputs and outputs.
@ -25,9 +26,9 @@ class Linear(nnq.Linear):
Examples::
>>> # xdoctest: +SKIP
>>> m = nn.quantized.dynamic.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> # xdoctest: +SKIP
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])

View File

@ -11,13 +11,16 @@ from torch.ao.nn.quantized.modules.utils import _quantize_weight
__all__ = ['pack_weight_bias', 'PackedParameter', 'RNNBase', 'LSTM', 'GRU', 'RNNCellBase', 'RNNCell', 'LSTMCell',
'GRUCell', "apply_permutation"]
def _apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
return tensor.index_select(dim, permutation)
def apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
warnings.warn("apply_permutation is deprecated, please use tensor.index_select(dim, permutation) instead")
return _apply_permutation(tensor, permutation, dim)
def pack_weight_bias(qweight, bias, dtype):
if dtype == torch.qint8:
@ -39,6 +42,7 @@ def pack_weight_bias(qweight, bias, dtype):
return packed_weight
class PackedParameter(torch.nn.Module):
def __init__(self, param):
super(PackedParameter, self).__init__()
@ -54,6 +58,7 @@ class PackedParameter(torch.nn.Module):
super(PackedParameter, self)._load_from_state_dict(state_dict, prefix, local_metadata, False,
missing_keys, unexpected_keys, error_msgs)
class RNNBase(torch.nn.Module):
_FLOAT_MODULE = nn.RNNBase
@ -347,7 +352,6 @@ class RNNBase(torch.nn.Module):
return qRNNBase
def _weight_bias(self):
# Returns a dict of weights and biases
weight_bias_dict: Dict[str, Dict] = {'weight' : {}, 'bias' : {}}
@ -376,6 +380,7 @@ class RNNBase(torch.nn.Module):
def get_bias(self):
return self._weight_bias()['bias']
class LSTM(RNNBase):
r"""
A dynamic quantized LSTM module with floating point tensor as inputs and outputs.
@ -384,6 +389,7 @@ class LSTM(RNNBase):
Examples::
>>> # xdoctest: +SKIP
>>> rnn = nn.LSTM(10, 20, 2)
>>> input = torch.randn(5, 3, 10)
>>> h0 = torch.randn(2, 3, 20)
@ -610,6 +616,7 @@ class GRU(RNNBase):
Examples::
>>> # xdoctest: +SKIP
>>> rnn = nn.GRU(10, 20, 2)
>>> input = torch.randn(5, 3, 10)
>>> h0 = torch.randn(2, 3, 20)
@ -922,6 +929,7 @@ class RNNCellBase(torch.nn.Module):
super(RNNCellBase, self)._load_from_state_dict(state_dict, prefix, local_metadata, False,
missing_keys, unexpected_keys, error_msgs)
class RNNCell(RNNCellBase):
r"""An Elman RNN cell with tanh or ReLU non-linearity.
A dynamic quantized RNNCell module with floating point tensor as inputs and outputs.
@ -930,6 +938,7 @@ class RNNCell(RNNCellBase):
Examples::
>>> # xdoctest: +SKIP
>>> rnn = nn.RNNCell(10, 20)
>>> input = torch.randn(6, 3, 10)
>>> hx = torch.randn(3, 20)
@ -982,6 +991,7 @@ class LSTMCell(RNNCellBase):
Examples::
>>> # xdoctest: +SKIP
>>> rnn = nn.LSTMCell(10, 20)
>>> input = torch.randn(6, 3, 10)
>>> hx = torch.randn(3, 20)
@ -1014,6 +1024,7 @@ class LSTMCell(RNNCellBase):
def from_float(cls, mod):
return super(LSTMCell, cls).from_float(mod)
class GRUCell(RNNCellBase):
r"""A gated recurrent unit (GRU) cell
@ -1023,6 +1034,7 @@ class GRUCell(RNNCellBase):
Examples::
>>> # xdoctest: +SKIP
>>> rnn = nn.GRUCell(10, 20)
>>> input = torch.randn(6, 3, 10)
>>> hx = torch.randn(3, 20)

View File

@ -164,6 +164,7 @@ def conv1d(input, weight, bias,
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> from torch.ao.nn.quantized import functional as qF
>>> filters = torch.randn(33, 16, 3, dtype=torch.float)
>>> inputs = torch.randn(20, 16, 50, dtype=torch.float)
@ -223,6 +224,7 @@ def conv2d(input, weight, bias,
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> from torch.ao.nn.quantized import functional as qF
>>> filters = torch.randn(8, 4, 3, 3, dtype=torch.float)
>>> inputs = torch.randn(1, 4, 5, 5, dtype=torch.float)
@ -283,6 +285,7 @@ def conv3d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1,
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> from torch.ao.nn.quantized import functional as qF
>>> filters = torch.randn(8, 4, 3, 3, 3, dtype=torch.float)
>>> inputs = torch.randn(1, 4, 5, 5, 5, dtype=torch.float)

View File

@ -293,6 +293,7 @@ class Conv1d(_ConvNd):
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> m = nn.quantized.Conv1d(16, 33, 3, stride=2)
>>> input = torch.randn(20, 16, 100)
>>> # quantize input to quint8
@ -400,6 +401,7 @@ class Conv2d(_ConvNd):
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> # With square kernels and equal stride
>>> m = nn.quantized.Conv2d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding
@ -498,6 +500,7 @@ class Conv3d(_ConvNd):
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> # With square kernels and equal stride
>>> m = nn.quantized.Conv3d(16, 33, 3, stride=2)
>>> # non-square kernels and unequal stride and with padding

View File

@ -115,6 +115,7 @@ class Linear(WeightedQuantizedModule):
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
>>> m = nn.quantized.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> # xdoctest: +SKIP

View File

@ -88,6 +88,7 @@ class DTypeConfig:
Example usage::
>>> # xdoctest: +SKIP(failing)
>>> dtype_config1 = DTypeConfig(
... input_dtype=torch.quint8,
... output_dtype=torch.quint8,

View File

@ -77,6 +77,7 @@ def _fuse_linear_bn_leaky_relu(is_qat, linear, bn, leaky_relu):
bn: BatchNorm1d instance that needs to be fused with the linear layer
leaky_relu: LeakyReLU instance that needs to be fused with the linear layer
Examples::
>>> # xdoctest: +SKIP(failing)
>>> m1 = nn.Linear(20, 10)
>>> b1 = nn.BatchNorm1d(10)
>>> lr = nn.LeakyReLU(0.01)

View File

@ -5,6 +5,7 @@ from typing import Any
__all__ = ["detect_anomaly", "set_detect_anomaly"]
class detect_anomaly(object):
r"""Context-manager that enable anomaly detection for the autograd engine.
@ -22,6 +23,7 @@ class detect_anomaly(object):
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_ANOMOLY)
>>> import torch
>>> from torch import autograd
>>> class MyFunc(autograd.Function):

View File

@ -11,6 +11,7 @@ __all__ = ["UnpackedDualTensor", "enter_dual_level", "exit_dual_level", "make_du
# Global variable used to make the python API simpler to use
_current_level = -1
def enter_dual_level():
r"""Function that can be used to enter a new forward grad level.
This level can be used to make and unpack dual Tensors to compute
@ -27,6 +28,7 @@ def enter_dual_level():
_current_level = new_level
return new_level
def exit_dual_level(*, level=None):
r"""Function that can be used to exit a forward grad level.
This function deletes all the gradients associated with this
@ -44,6 +46,7 @@ def exit_dual_level(*, level=None):
torch._C._exit_dual_level(level=level)
_current_level = level - 1
def make_dual(tensor, tangent, *, level=None):
r"""Associates a tensor value with a forward gradient, the tangent, to create a
"dual tensor", which is used to compute forward AD gradients.
@ -104,11 +107,13 @@ def make_dual(tensor, tangent, *, level=None):
_UnpackedDualTensor = namedtuple('_UnpackedDualTensor', ['primal', 'tangent'])
class UnpackedDualTensor(_UnpackedDualTensor):
r"""Namedtuple returned by :func:`unpack_dual` containing the primal and tangent components of the dual tensor.
See :func:`unpack_dual` for more details."""
pass
def unpack_dual(tensor, *, level=None):
r"""Unpacks a "dual tensor" to get both its Tensor value and its forward AD gradient.
The result is a namedtuple ``(primal, tangent)`` where ``primal`` is a view of
@ -139,6 +144,7 @@ def unpack_dual(tensor, *, level=None):
return UnpackedDualTensor(primal, dual)
class dual_level(_DecoratorContextManager):
r"""Context-manager that enables forward AD. All forward AD computation must
be performed in a ``dual_level`` context.

View File

@ -48,6 +48,7 @@ class FunctionCtx(object):
See :ref:`extending-autograd` for more details on how to use this method.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> class Func(Function):
>>> @staticmethod
>>> def forward(ctx, x: torch.Tensor, y: torch.Tensor, z: int):
@ -139,6 +140,7 @@ class FunctionCtx(object):
modification.
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> class Inplace(Function):
>>> @staticmethod
>>> def forward(ctx, x):
@ -210,6 +212,7 @@ class FunctionCtx(object):
prior to calling the :func:`backward` and :func:`jvp` methods.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> class SimpleFunc(Function):
>>> @staticmethod
>>> def forward(ctx, x):
@ -382,6 +385,7 @@ class Function(_SingleLevelFunction):
Examples::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> class Exp(Function):
>>> @staticmethod
>>> def forward(ctx, i):

View File

@ -7,6 +7,7 @@ __all__ = ["vjp", "jvp", "jacobian", "hessian", "hvp", "vhp"]
# Utility functions
def _as_tuple_nocheck(x):
if isinstance(x, tuple):
return x
@ -15,6 +16,7 @@ def _as_tuple_nocheck(x):
else:
return x,
def _as_tuple(inp, arg_name=None, fn_name=None):
# Ensures that inp is a tuple of Tensors
# Returns whether or not the original inp was a tuple and the tupled version of the input
@ -37,6 +39,7 @@ def _as_tuple(inp, arg_name=None, fn_name=None):
return is_inp_tuple, inp
def _tuple_postprocess(res, to_unpack):
# Unpacks a potentially nested tuple of Tensors
# to_unpack should be a single boolean or a tuple of two booleans.
@ -54,6 +57,7 @@ def _tuple_postprocess(res, to_unpack):
res = res[0]
return res
def _grad_preprocess(inputs, create_graph, need_graph):
# Preprocess the inputs to make sure they require gradient
# inputs is a tuple of Tensors to preprocess
@ -88,6 +92,7 @@ def _grad_postprocess(inputs, create_graph):
else:
return tuple(_grad_postprocess(inp, create_graph) for inp in inputs)
def _validate_v(v, other, is_other_tuple):
# This assumes that other is the correct shape, and v should match
# Both are assumed to be tuples of Tensors
@ -138,6 +143,7 @@ def _check_requires_grad(inputs, input_type, strict):
" The outputs must be computed in a differentiable manner from the input"
" when running in strict mode.".format(i))
def _autograd_grad(outputs, inputs, grad_outputs=None, create_graph=False, retain_graph=None, is_grads_batched=False):
# Version of autograd.grad that accepts `None` in outputs and do not compute gradients for them.
# This has the extra constraint that inputs has to be a tuple
@ -162,6 +168,7 @@ def _autograd_grad(outputs, inputs, grad_outputs=None, create_graph=False, retai
create_graph=create_graph, retain_graph=retain_graph,
is_grads_batched=is_grads_batched)
def _fill_in_zeros(grads, refs, strict, create_graph, stage):
# Used to detect None in the grads and depending on the flags, either replace them
# with Tensors full of 0s of the appropriate size based on the refs or raise an error.
@ -204,6 +211,7 @@ def _fill_in_zeros(grads, refs, strict, create_graph, stage):
return res
# Public API
def vjp(func, inputs, v=None, create_graph=False, strict=False):
@ -238,6 +246,7 @@ def vjp(func, inputs, v=None, create_graph=False, strict=False):
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def exp_reducer(x):
... return x.exp().sum(dim=1)
>>> inputs = torch.rand(4, 4)
@ -335,6 +344,7 @@ def jvp(func, inputs, v=None, create_graph=False, strict=False):
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def exp_reducer(x):
... return x.exp().sum(dim=1)
>>> inputs = torch.rand(4, 4)
@ -536,6 +546,7 @@ def jacobian(func, inputs, create_graph=False, strict=False, vectorize=False, st
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def exp_reducer(x):
... return x.exp().sum(dim=1)
>>> inputs = torch.rand(2, 2)
@ -698,6 +709,7 @@ def jacobian(func, inputs, create_graph=False, strict=False, vectorize=False, st
return _tuple_postprocess(jacobian, (is_outputs_tuple, is_inputs_tuple))
def hessian(func, inputs, create_graph=False, strict=False, vectorize=False, outer_jacobian_strategy="reverse-mode"):
r"""Function that computes the Hessian of a given scalar function.
@ -746,6 +758,7 @@ def hessian(func, inputs, create_graph=False, strict=False, vectorize=False, out
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def pow_reducer(x):
... return x.pow(3).sum()
>>> inputs = torch.rand(2, 2)
@ -849,6 +862,7 @@ def vhp(func, inputs, v=None, create_graph=False, strict=False):
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def pow_reducer(x):
... return x.pow(3).sum()
>>> inputs = torch.rand(2, 2)
@ -939,6 +953,7 @@ def hvp(func, inputs, v=None, create_graph=False, strict=False):
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def pow_reducer(x):
... return x.pow(3).sum()
>>> inputs = torch.rand(2, 2)

View File

@ -270,6 +270,7 @@ class inference_mode(_DecoratorContextManager):
mode (bool): Flag whether to enable or disable inference mode
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> import torch
>>> x = torch.ones(1, 2, 3, requires_grad=True)
>>> with torch.inference_mode():

View File

@ -48,6 +48,7 @@ class saved_tensors_hooks():
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> def pack_hook(x):
... print("Packing", x)
... return x
@ -107,6 +108,7 @@ class save_on_cpu(saved_tensors_hooks):
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
>>> a = torch.randn(5, requires_grad=True, device="cuda")
>>> b = torch.randn(5, requires_grad=True, device="cuda")
>>> c = torch.randn(5, requires_grad=True, device="cuda")
@ -160,6 +162,7 @@ def disable_saved_tensors_hooks(error_message):
Example::
>>> # xdoctest: +SKIP(failing)
>>> message = "saved tensors default hooks are disabled"
>>> with torch.autograd.graph.disable_saved_tensors_hooks(message):
... # Raises RuntimeError: saved tensors default hooks are disabled

View File

@ -121,6 +121,7 @@ class profile(object):
Example:
>>> # xdoctest: +SKIP
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD_PROFILER)
>>> x = torch.randn((1, 1), requires_grad=True)
>>> with torch.autograd.profiler.profile() as prof:
>>> for _ in range(100): # any normal python code, really!
@ -453,6 +454,7 @@ class record_function(_ContextDecorator):
non-distributed cases.
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD_PROFILER)
>>> x = torch.randn((1, 1), requires_grad=True)
>>> with torch.autograd.profiler.profile() as prof:
... y = x ** 2
@ -578,6 +580,7 @@ class emit_itt(object):
Example:
>>> # xdoctest: +SKIP("Undefined variables")
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD_PROFILER)
>>> with torch.autograd.profiler.emit_itt():
... model(x)
@ -646,6 +649,7 @@ class emit_nvtx(object):
Example:
>>> # xdoctest: +SKIP("undefined variables")
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD_PROFILER)
>>> with torch.cuda.profiler.profile():
... model(x) # Warmup CUDA memory allocator and profiler
... with torch.autograd.profiler.emit_nvtx():

View File

@ -6,6 +6,7 @@ import re
__all__ : List[str] = []
class _CodeParser:
def __init__(self, code_string: str):
optional_ws = r"\s*"
@ -37,6 +38,7 @@ class _CodeParser:
self.function_params = result["function_params"]
self.function_body = result["function_body"]
class _JittedFunction:
def __init__(self, code_string: str, return_by_ref: bool, num_outputs: int, **kwargs):
self.code_string = code_string
@ -135,6 +137,7 @@ def _create_jit_fn(code_string: str, **kwargs) -> Callable:
return _JittedFunction(code_string, return_by_ref=False, num_outputs=1, **kwargs)
def _create_multi_output_jit_fn(code_string: str, num_outputs: int, **kwargs) -> Callable:
"""
Create a jiterator-generated cuda kernel for an elementwise op that supports returning one or more outputs.

View File

@ -825,6 +825,7 @@ class DistributedDataParallel(Module):
Example::
Below is an example of a noop hook that returns the same tensor.
>>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
>>> def noop(state: object, bucket: dist.GradBucket) -> torch.futures.Future[torch.Tensor]:
>>> fut = torch.futures.Future()
>>> fut.set_result(bucket.buffer())
@ -837,6 +838,7 @@ class DistributedDataParallel(Module):
Below is an example of a Parallel SGD algorithm where gradients are encoded before
allreduce, and then decoded after allreduce.
>>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
>>> def encode_and_decode(state: object, bucket: dist.GradBucket) -> torch.futures.Future[torch.Tensor]:
>>> encoded_tensor = encode(bucket.buffer()) # encode gradients
>>> fut = torch.distributed.all_reduce(encoded_tensor).get_future()

View File

@ -195,6 +195,7 @@ def checkpoint(module: nn.Module, *, use_reentrant: bool = True) -> nn.Module:
autograd.
Example::
>>> # xdoctest: +SKIP
>>> import torch.nn as nn
>>>
>>> class MyModel(nn.Module):

View File

@ -41,6 +41,7 @@ def contract(state_cls: Type[_State] = _State):
``func.state(module)``.
Example::
>>> # xdoctest: +SKIP
>>> import torch.nn as nn
>>>
>>> class MyModel(nn.Module):

View File

@ -18,6 +18,7 @@ def replicate(
module (torch.nn.Module): module to replicate
Example::
>>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
>>> module = nn.Linear(3, 3)
>>> replicate(module)
"""

View File

@ -427,6 +427,7 @@ def custom_sharded_op_impl(func):
parameters, the function provided will be invoked for that operator.
Example::
>>> # xdoctest: +SKIP
>>> @custom_sharded_op_impl(torch.nn.functional.linear)
>>> def my_custom_sharded_linear(types, args, kwargs, process_group):
>>> ...

View File

@ -805,9 +805,9 @@ class ShardedTensor(ShardedTensorBase):
tensor stored in the current rank.
Examples:
>>> # xdoctest: +SKIP
>>> # All tensors below are of torch.int64 type.
>>> # We have 2 process groups, 2 ranks.
>>> # xdoctest: +SKIP
>>> tensor = torch.arange(2, dtype=torch.int64) + 1 + 2 * rank
>>> local_tensor = torch.unsqueeze(torch.cat([tensor, tensor + 2]))
>>> local_tensor
@ -955,8 +955,8 @@ class ShardedTensor(ShardedTensorBase):
A :class:`ShardedTensor` object whose local shards are resharded.
Examples:
>>> # We have 2 process groups, 2 ranks.
>>> # xdoctest: +SKIP
>>> # We have 2 process groups, 2 ranks.
>>> tensor = torch.arange(4, dtype=torch.int64) + 1 + 2 * rank
>>> tensor = torch.stack([tensor, tensor])
>>> tensor

View File

@ -36,6 +36,7 @@ class ShardingPlan(object):
Suppose we want to shard a module with two linear layers and then run it with DDP, we also
want to convert the output of the second linear layer back to DDP, we can do it as follows:
>>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
>>> class MyModule(nn.Module):
>>> def __init__(self):
>>> super().__init__()

View File

@ -54,6 +54,7 @@ class MemoryTracker:
Example usage:
>>> # xdoctest: +SKIP(failing)
>>> net.cuda()
>>> input = input.cuda()

View File

@ -25,6 +25,7 @@ if is_available():
DistAutogradContext,
)
class context(object):
'''
Context object to wrap forward and backward passes when using
@ -35,8 +36,8 @@ class context(object):
autograd pass.
Example::
>>> import torch.distributed.autograd as dist_autograd
>>> # xdoctest: +SKIP
>>> import torch.distributed.autograd as dist_autograd
>>> with dist_autograd.context() as context_id:
>>> t1 = torch.rand((3, 3), requires_grad=True)
>>> t2 = torch.rand((3, 3), requires_grad=True)

View File

@ -202,6 +202,7 @@ def load_sharded_optimizer_state_dict(
"""
Loads a state_dict to be used in conjuntion with FSDP sharded optimizer state.
This is the current recommended way to checkpoint is FSDP
>>> # xdoctest: +SKIP
>>> import torch.distributed.checkpoint as dist_cp
>>> import spmd.checkpoint as sp_cp
>>> # Save
@ -224,7 +225,7 @@ def load_sharded_optimizer_state_dict(
>>> with FSDP.state_dict_type(model_tp, StateDictType.SHARDED_STATE_DICT):
>>> model_state_dict = model_tp.state_dict()
>>> checkpoint = {
>>> "model" = model_state_dict
>>> "model": model_state_dict
>>> }
>>> dist_cp.load_state_dict(
>>> state_dict=checkpoint,

View File

@ -1940,6 +1940,7 @@ def _tensor_to_object(tensor, tensor_size):
buf = tensor.numpy().tobytes()[:tensor_size]
return _unpickler(io.BytesIO(buf)).load()
def _check_for_nccl_backend(group):
pg = group or _get_default_group()
# Gate PG wrapper check on Gloo availability.
@ -1954,6 +1955,7 @@ def _check_for_nccl_backend(group):
pg.name() == Backend.NCCL
)
@exception_handler
def all_gather_object(object_list, obj, group=None):
"""

View File

@ -323,11 +323,10 @@ class ZeroRedundancyOptimizer(Optimizer, Joinable):
Example::
>>> # xdoctest: +SKIP
>>> import torch.nn as nn
>>> from torch.distributed.optim import ZeroRedundancyOptimizer
>>> from torch.nn.parallel import DistributedDataParallel as DDP
>>> # xdoctest: +SKIP
>>> model = nn.Sequential(*[nn.Linear(2000, 2000).to(rank) for _ in range(20)])
>>> ddp = DDP(model, device_ids=[rank])
>>> opt = ZeroRedundancyOptimizer(

View File

@ -30,10 +30,12 @@ def _prepare_input_validate(
func (Callable): Same input function with validation logic added.
Example::
>>> # xdoctest: +SKIP(failing)
>>> @_prepare_input_validate
>>> def make_input_shard_1d(args, kwargs):
>>> ...
>>>
>>> # xdoctest: +SKIP(failing)
>>> input = torch.rand(...)
>>> dtensor = make_input_shard_1d(input, device_mesh, 1)
>>> # This will call '_prepare_input_validate' first
@ -71,14 +73,18 @@ def _prepare_output_validate(
Inject common validation logics for _prepare_output funcs via this
decorator, including verifying that output needs to be a DTensor
and only 1D Device Mesh is passed in.
Example::
>>> # xdoctest: +SKIP(failing)
>>> @_prepare_output_validate
>>> def make_output_shard_1d(args, kwargs):
>>> ...
>>>
>>> # xdoctest: +SKIP(failing)
>>> dt = distribute(tensor, device_mesh, [Shard(0)])
>>> make_output_shard_1d(dt, device_mesh, 1)
>>> # This will call '_prepare_output_validate' first
Args:
_prepare_output_func (Callable): The func we want to inject the
validation into.

View File

@ -61,7 +61,7 @@ def parallelize_module( # type: ignore[return]
Example::
>>> # xdoctest: +SKIP("distributed")
>>> from from torch.distributed._tensor.parallel import parallelize_module, PairwiseParallel
>>> from torch.distributed._tensor.parallel import parallelize_module, PairwiseParallel
>>>
>>> # Define the module.
>>> m = Model(...)

View File

@ -8,6 +8,7 @@ from torch.distributions.utils import broadcast_all, lazy_property
__all__ = ['VonMises']
def _eval_poly(y, coef):
coef = list(coef)
result = coef.pop()

View File

@ -267,18 +267,18 @@ def einsum(*args: Any) -> Tensor:
Examples::
>>> # trace
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # trace
>>> torch.einsum('ii', torch.randn(4, 4))
tensor(-1.2104)
>>> # diagonal
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # diagonal
>>> torch.einsum('ii->i', torch.randn(4, 4))
tensor([-0.1034, 0.7952, -0.2433, 0.4545])
>>> # outer product
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # outer product
>>> x = torch.randn(5)
>>> y = torch.randn(4)
>>> torch.einsum('i,j->ij', x, y)
@ -288,8 +288,8 @@ def einsum(*args: Any) -> Tensor:
[ 0.1713, -0.4291, -0.5802, 0.7350],
[ 0.5704, -1.4290, -1.9323, 2.4480]])
>>> # batch matrix multiplication
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # batch matrix multiplication
>>> As = torch.randn(3, 2, 5)
>>> Bs = torch.randn(3, 5, 4)
>>> torch.einsum('bij,bjk->bik', As, Bs)
@ -302,8 +302,8 @@ def einsum(*args: Any) -> Tensor:
[[ 2.8153, 1.8787, -4.3839, -1.2112],
[ 0.3728, -2.1131, 0.0921, 0.8305]]])
>>> # with sublist format and ellipsis
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> # with sublist format and ellipsis
>>> torch.einsum(As, [..., 0, 1], Bs, [..., 1, 2], [..., 0, 2])
tensor([[[-1.0564, -1.5904, 3.2023, 3.1271],
[-1.6706, -0.8097, -0.8025, -2.1183]],
@ -1604,6 +1604,7 @@ def chain_matmul(*matrices, out=None):
Example::
>>> # xdoctest: +SKIP
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> a = torch.randn(3, 4)
>>> b = torch.randn(4, 5)

View File

@ -144,6 +144,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
on those futures independently.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
>>> def callback(fut):
... print(f"RPC return value is {fut.wait()}.")
>>> fut = torch.futures.Future()
@ -191,8 +192,9 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
for handling completion/waiting on those futures independently.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
>>> def callback(fut):
... print(f"This will run after the future has finished.")
... print("This will run after the future has finished.")
... print(fut.wait())
>>> fut = torch.futures.Future()
>>> fut.add_done_callback(callback)
@ -223,6 +225,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
result (object): the result object of this ``Future``.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
>>> import threading
>>> import time
>>> def slow_set_future(fut, value):
@ -251,6 +254,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
result (BaseException): the exception for this ``Future``.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
>>> fut = torch.futures.Future()
>>> fut.set_exception(ValueError("foo"))
>>> fut.wait()
@ -281,6 +285,7 @@ def collect_all(futures: List[Future]) -> Future[List[Future]]:
in Futures.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
>>> fut0 = torch.futures.Future()
>>> fut1 = torch.futures.Future()
>>> fut = torch.futures.collect_all([fut0, fut1])

View File

@ -36,10 +36,11 @@ class Dispatcher(object):
return self
return _
class VarDispatcher(Dispatcher):
""" A dispatcher that calls functions with variable names
>>> d = VarDispatcher('d')
>>> # xdoctest: +SKIP
>>> d = VarDispatcher('d')
>>> x = var('x')
>>> @d.register('inc', x)
... def f(x):
@ -58,8 +59,6 @@ class VarDispatcher(Dispatcher):
return func(**d)
global_namespace = {} # type: ignore[var-annotated]

View File

@ -7,11 +7,11 @@ def unifiable(cls):
This uses the type and __dict__ or __slots__ attributes to define the
nature of the term
See Also:
>>> # xdoctest: +SKIP
>>> class A(object):
... def __init__(self, a, b):
... self.a = a
... self.b = b
>>> # xdoctest: +SKIP
>>> unifiable(A)
<class 'unification.more.A'>
>>> x = var('x')
@ -33,13 +33,13 @@ def unifiable(cls):
def reify_object(o, s):
""" Reify a Python object with a substitution
>>> # xdoctest: +SKIP
>>> class Foo(object):
... def __init__(self, a, b):
... self.a = a
... self.b = b
... def __str__(self):
... return "Foo(%s, %s)"%(str(self.a), str(self.b))
>>> # xdoctest: +SKIP
>>> x = var('x')
>>> f = Foo(1, x)
>>> print(f)
@ -88,13 +88,13 @@ def _reify(o, s):
def unify_object(u, v, s):
""" Unify two Python objects
Unifies their type and ``__dict__`` attributes
>>> # xdoctest: +SKIP
>>> class Foo(object):
... def __init__(self, a, b):
... self.a = a
... self.b = b
... def __str__(self):
... return "Foo(%s, %s)"%(str(self.a), str(self.b))
>>> # xdoctest: +SKIP
>>> x = var('x')
>>> f = Foo(1, x)
>>> g = Foo(1, 2)
@ -110,6 +110,7 @@ def unify_object(u, v, s):
else:
return unify(u.__dict__, v.__dict__, s)
@dispatch(slice, slice, dict)
def _unify(u, v, s):
""" Unify a Python ``slice`` object """

View File

@ -13,14 +13,16 @@ def dispatch(*types, **kwargs):
Collects implementations based on the function name. Ignores namespaces.
If ambiguous type signatures occur a warning is raised when the function is
defined suggesting the additional method to break the ambiguity.
Examples
--------
Example:
>>> # xdoctest: +SKIP
>>> @dispatch(int)
... def f(x):
... return x + 1
>>> @dispatch(float)
... def f(x):
... return x - 1
>>> # xdoctest: +SKIP
>>> f(3)
4
>>> f(3.0)

View File

@ -121,6 +121,7 @@ class Dispatcher(object):
def register(self, *types, **kwargs):
""" register dispatcher with new implementation
>>> # xdoctest: +SKIP
>>> f = Dispatcher('f')
>>> @f.register(int)
... def inc(x):
@ -172,6 +173,7 @@ class Dispatcher(object):
def add(self, signature, func):
""" Add new types/method pair to dispatcher
>>> # xdoctest: +SKIP
>>> D = Dispatcher('add')
>>> D.add((int, int), lambda x, y: x + y)
>>> D.add((float, float), lambda x, y: x + y)

View File

@ -44,6 +44,7 @@ def isvariadic(obj):
Whether or not `obj` is variadic
Examples
--------
>>> # xdoctest: +SKIP
>>> isvariadic(int)
False
>>> isvariadic(Variadic[int])
@ -76,8 +77,8 @@ class Variadic(six.with_metaclass(VariadicSignatureMeta)):
representing a specific variadic signature.
Examples
--------
>>> Variadic[int] # any number of int arguments
>>> # xdoctest: +SKIP
>>> Variadic[int] # any number of int arguments
<class 'multipledispatch.variadic.Variadic[int]'>
>>> Variadic[(int, str)] # any number of one of int or str arguments
<class 'multipledispatch.variadic.Variadic[(int, str)]'>

View File

@ -7,6 +7,7 @@ __all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap',
'valfilter', 'keyfilter', 'itemfilter',
'assoc', 'dissoc', 'assoc_in', 'update_in', 'get_in')
def _get_factory(f, kwargs):
factory = kwargs.pop('factory', dict)
if kwargs:
@ -336,6 +337,7 @@ def get_in(keys, coll, default=None, no_default=False):
raise
return default
def getter(index):
if isinstance(index, list):
if len(index) == 1:
@ -348,6 +350,7 @@ def getter(index):
else:
return operator.itemgetter(index)
def groupby(key, seq):
""" Group a collection by a key function
@ -383,6 +386,7 @@ def groupby(key, seq):
rv[k] = v.__self__ # type: ignore[var-annotated, attr-defined]
return rv
def first(seq):
""" The first element in a sequence

View File

@ -36,8 +36,8 @@ def _toposort(edges):
edges - a dict of the form {a: {b, c}} where b and c depend on a
outputs:
L - an ordered list of nodes that satisfy the dependencies of edges
>>> _toposort({1: (2, 3), 2: (3, )})
>>> # xdoctest: +SKIP
>>> _toposort({1: (2, 3), 2: (3, )})
[1, 2, 3]
Closely follows the wikipedia page [2]
[1] Kahn, Arthur B. (1962), "Topological sorting of large networks",

View File

@ -36,6 +36,7 @@ class Var(object):
def var():
return lambda *args: Var(*args)
def vars():
return lambda n: [var() for i in range(n)]
@ -46,6 +47,7 @@ def isvar(v):
isvar
@dispatch(object) # type: ignore[no-redef]
def isvar(o):
return not not _glv and hashable(o) and o in _glv
@ -53,14 +55,17 @@ def isvar(o):
@contextmanager
def variables(*variables):
""" Context manager for logic variables
"""
Context manager for logic variables
Example:
>>> # xdoctest: +SKIP("undefined vars")
>>> from __future__ import with_statement
>>> with variables(1):
... print(isvar(1))
True
>>> print(isvar(1))
False
>>> # xdoctest: +SKIP("undefined vars")
>>> # Normal approach
>>> from unification import unify
>>> x = var('x')

View File

@ -388,6 +388,7 @@ def list(github, force_reload=False, skip_validation=False, trust_repo=None):
list: The available callables entrypoint
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_HUB)
>>> entrypoints = torch.hub.list('pytorch/vision', force_reload=True)
"""
repo_dir = _get_cache_or_reload(github, force_reload, trust_repo, "list", verbose=True,
@ -440,6 +441,7 @@ def help(github, model, force_reload=False, skip_validation=False, trust_repo=No
Default is ``None`` and will eventually change to ``"check"`` in v1.14.
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_HUB)
>>> print(torch.hub.help('pytorch/vision', 'resnet18', force_reload=True))
"""
repo_dir = _get_cache_or_reload(github, force_reload, trust_repo, "help", verbose=True,
@ -519,6 +521,7 @@ def load(repo_or_dir, model, *args, source='github', trust_repo=None, force_relo
``*args`` and ``**kwargs``.
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_HUB)
>>> # from a github repo
>>> repo = 'pytorch/vision'
>>> model = torch.hub.load(repo, 'resnet50', weights='ResNet50_Weights.IMAGENET1K_V1')
@ -586,6 +589,7 @@ def download_url_to_file(url, dst, hash_prefix=None, progress=True):
Default: True
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_HUB)
>>> # xdoctest: +REQUIRES(POSIX)
>>> torch.hub.download_url_to_file('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth', '/tmp/temporary_file')
@ -694,6 +698,7 @@ def load_state_dict_from_url(
file_name (str, optional): name for the downloaded file. Filename from ``url`` will be used if not set.
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_HUB)
>>> state_dict = torch.hub.load_state_dict_from_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
"""

View File

@ -14,6 +14,7 @@ _impls: Set[str] = set()
# prim is reserved by TorchScript interpreter
_reserved_namespaces = ['prim']
class Library:
"""
A class to create libraries that can be used to register new operators or
@ -57,6 +58,7 @@ class Library:
name of the operator as inferred from the schema.
Example::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LIBRARY)
>>> my_lib = Library("foo", "DEF")
>>> my_lib.define("sum(Tensor self) -> Tensor")
'''
@ -105,7 +107,6 @@ class Library:
"'s behavior for {} dispatch key and {} namespace.".
format(name.split("::")[-1], dispatch_key, self.ns))
if dispatch_key == "Meta":
dispatcher_op_name = name
if '::' not in dispatcher_op_name:
@ -135,6 +136,7 @@ class Library:
_impls.remove(key)
del self.m
# decorator to register python functions for library ops
# Note: this decorator API should remain consistent with `Library.impl` API
def impl(lib, name, dispatch_key=""):
@ -143,6 +145,7 @@ def impl(lib, name, dispatch_key=""):
return f
return wrap
def define(lib, schema, alias_analysis=""):
def wrap(f):
name = lib.define(schema, alias_analysis)

View File

@ -8,6 +8,7 @@ if TYPE_CHECKING:
STAT_EVENT = "torch.monitor.Stat"
class TensorboardEventHandler:
"""
TensorboardEventHandler is an event handler that will write known events to
@ -16,6 +17,8 @@ class TensorboardEventHandler:
This currently only supports ``torch.monitor.Stat`` events which are logged
as scalars.
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_MONITOR)
>>> # xdoctest: +REQUIRES(module:tensorboard)
>>> from torch.utils.tensorboard import SummaryWriter
>>> from torch.monitor import TensorboardEventHandler, register_event_handler

View File

@ -14,6 +14,7 @@ __all__ = ['Threshold', 'ReLU', 'RReLU', 'Hardtanh', 'ReLU6', 'Sigmoid', 'Hardsi
'LogSigmoid', 'Softplus', 'Softshrink', 'MultiheadAttention', 'PReLU', 'Softsign', 'Tanhshrink',
'Softmin', 'Softmax', 'Softmax2d', 'LogSoftmax']
class Threshold(Module):
r"""Thresholds each element of the input Tensor.

View File

@ -625,6 +625,7 @@ class SyncBatchNorm(_BatchNorm):
Examples::
>>> # xdoctest: +SKIP
>>> # With Learnable Parameters
>>> m = nn.SyncBatchNorm(100)
>>> # creating process group (optional)
@ -634,7 +635,6 @@ class SyncBatchNorm(_BatchNorm):
>>> # Note: every rank calls into new_group for every
>>> # process group created, even if that rank is not
>>> # part of the group.
>>> # xdoctest: +SKIP
>>> process_groups = [torch.distributed.new_group(pids) for pids in [r1, r2]]
>>> process_group = process_groups[0 if dist.get_rank() <= 3 else 1]
>>> # Without Learnable Parameters

View File

@ -23,10 +23,12 @@ _rnn_impls = {
def _apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
return tensor.index_select(dim, permutation)
def apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
warnings.warn("apply_permutation is deprecated, please use tensor.index_select(dim, permutation) instead")
return _apply_permutation(tensor, permutation, dim)
class RNNBase(Module):
__constants__ = ['mode', 'input_size', 'hidden_size', 'num_layers', 'bias',
'batch_first', 'dropout', 'bidirectional', 'proj_size']

View File

@ -4,6 +4,7 @@ from typing import List, Dict, Any
__all__ = ['consume_prefix_in_state_dict_if_present']
def _ntuple(n, name="parse"):
def parse(x):
if isinstance(x, collections.abc.Iterable):

View File

@ -1375,6 +1375,7 @@ class DistributedDataParallel(Module, Joinable):
Example::
>>> # xdoctest: +SKIP("Distributed")
>>> import torch
>>> import torch.distributed as dist
>>> import os
@ -1548,18 +1549,18 @@ class DistributedDataParallel(Module, Joinable):
Example::
Below is an example of a noop hook that returns the same tensor.
>>> # xdoctest: +SKIP('undefined name')
>>> def noop(state: object, bucket: dist.GradBucket) -> torch.futures.Future[torch.Tensor]:
>>> fut = torch.futures.Future()
>>> fut.set_result(bucket.buffer())
>>> return fut
>>> # xdoctest: +SKIP('undefined name')
>>> ddp.register_comm_hook(state=None, hook=noop)
Example::
Below is an example of a Parallel SGD algorithm where gradients are encoded before
allreduce, and then decoded after allreduce.
>>> # xdoctest: +SKIP('undefined name')
>>> def encode_and_decode(state: object, bucket: dist.GradBucket) -> torch.futures.Future[torch.Tensor]:
>>> encoded_tensor = encode(bucket.buffer()) # encode gradients
>>> fut = torch.distributed.all_reduce(encoded_tensor).get_future()
@ -1568,8 +1569,6 @@ class DistributedDataParallel(Module, Joinable):
>>> decoded_tensor = decode(fut.value()[0]) # decode gradients
>>> return decoded_tensor
>>> return fut.then(decode)
>>> # xdoctest: +SKIP('undefined name')
>>> ddp.register_comm_hook(state=None, hook=encode_and_decode)
"""
self._check_comm_hook(hook)

View File

@ -9,6 +9,7 @@ from .expanded_weights_utils import \
THRESHOLD = 32
def conv_picker(func, conv1dOpt, conv2dOpt, conv3dOpt):
if func == F.conv1d:
return conv1dOpt
@ -18,6 +19,7 @@ def conv_picker(func, conv1dOpt, conv2dOpt, conv3dOpt):
assert func == F.conv3d
return conv3dOpt
def conv_args_and_kwargs(kwarg_names, expanded_args_and_kwargs):
args = expanded_args_and_kwargs[:len(expanded_args_and_kwargs) - len(kwarg_names)]
kwargs = expanded_args_and_kwargs[len(expanded_args_and_kwargs) - len(kwarg_names):]
@ -25,6 +27,7 @@ def conv_args_and_kwargs(kwarg_names, expanded_args_and_kwargs):
return conv_normalizer(*args, **kwargs)
def conv_normalizer(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
return (input, weight), {'bias': bias, 'stride': stride, 'padding': padding, 'dilation': dilation, 'groups': groups}
@ -124,6 +127,7 @@ def conv_backward(func, ctx, grad_output):
set_grad_sample_if_exists(ctx.bias, lambda _: grad_output.reshape(*grad_output.shape[:2], -1).sum(dim=2))
return tuple(results)
def conv_unfold_weight_grad_sample(input, grad_output, weight_shape, kernel_size, stride, padding, dilation, groups, func):
n = input.shape[0]
in_channels = input.shape[1]
@ -158,6 +162,7 @@ def conv_unfold_weight_grad_sample(input, grad_output, weight_shape, kernel_size
weight_grad_sample = weight_grad_sample.view(shape)
return weight_grad_sample
def conv_group_weight_grad_sample(input, grad_output, weight_shape, stride, padding, dilation, batch_size, func):
I = input.shape[1]
O = grad_output.shape[1]
@ -195,9 +200,9 @@ def unfold3d(
A tensor of shape ``(B, C * np.product(kernel_size), L)``, where L - output spatial dimensions.
See :class:`torch.nn.Unfold` for more details
Example:
>>> # xdoctest: +SKIP
>>> B, C, D, H, W = 3, 4, 5, 6, 7
>>> tensor = torch.arange(1, B * C * D * H * W + 1.).view(B, C, D, H, W)
>>> # xdoctest: +SKIP
>>> unfold3d(tensor, kernel_size=2, padding=0, stride=1).shape
torch.Size([3, 32, 120])
"""

View File

@ -6,6 +6,7 @@ from torch.nn.utils._expanded_weights.expanded_weights_impl import ExpandedWeigh
from torch.utils._pytree import tree_flatten
# dependency on `functional_call` means that this can't be exposed in utils
# without creating circular dependency
def call_for_per_sample_grads(module, *, batch_size=None, loss_reduction="sum"):
@ -28,17 +29,17 @@ def call_for_per_sample_grads(module, *, batch_size=None, loss_reduction="sum"):
running mean across a batch. Must be "mean" or "sum". Default: "sum"
Examples::
>>> # xdoctest: +SKIP
>>> model = nn.Linear(4, 3)
>>> batched_input = torch.randn(5, 4) # batch size of 5
>>> # xdoctest: +SKIP
>>> res = call_for_per_sample_grads(model)(batched_input).sum()
>>> res.backward()
>>> assert model.weight.shape == (3, 4)
>>> assert model.weight.grad_sample.shape == (5, 3, 4)
>>> assert model.weight.grad == None
>>> assert model.weight.grad is None
>>> assert model.bias.shape == (3,)
>>> assert model.bias.grad_sample.shape == (5, 3)
>>> assert model.bias.grad == None
>>> assert model.bias.grad is None
An example using "mean" loss reduction. The grad_sample fields will be scaled by batch_size from what they would be
if we ran the same code with loss_reduction="sum". This is because the mean at the end will scale all

View File

@ -28,8 +28,8 @@ def skip_init(module_cls, *args, **kwargs):
Example::
>>> import torch
>>> # xdoctest: +IGNORE_WANT("non-deterministic")
>>> import torch
>>> m = torch.nn.utils.skip_init(torch.nn.Linear, 5, 1)
>>> m.weight
Parameter containing:

View File

@ -1,5 +1,6 @@
import torch
def convert_conv2d_weight_memory_format(module, memory_format):
r"""Convert ``memory_format`` of ``nn.Conv2d.weight`` to ``memory_format``
The conversion recursively applies to nested ``nn.Module``, including ``module``.
@ -50,6 +51,7 @@ def convert_conv2d_weight_memory_format(module, memory_format):
The original module with updated ``nn.Conv2d``
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
>>> # xdoctest: +REQUIRES(env:CUBLAS_WORKSPACE_CONFIG)
>>> input = torch.randint(1, 10, (2, 8, 4, 4), dtype=torch.float16, device="cuda")
>>> model = nn.Sequential(

View File

@ -1002,7 +1002,7 @@ def ln_structured(module, name, amount, n, dim, importance_scores=None):
module (nn.Module): modified (i.e. pruned) version of the input module
Examples:
>>> # xdoctest: +SKIP
>>> from torch.nn.utils import prune
>>> m = prune.ln_structured(
... nn.Conv2d(5, 3, 2), 'weight', amount=0.3, dim=1, n=float('-inf')
... )
@ -1055,7 +1055,8 @@ def global_unstructured(parameters, pruning_method, importance_scores=None, **kw
scope of global pruning to unstructured methods.
Examples:
>>> # xdoctest: +SKIP
>>> from torch.nn.utils import prune
>>> from collections import OrderedDict
>>> net = nn.Sequential(OrderedDict([
... ('first', nn.Linear(10, 4)),
... ('second', nn.Linear(4, 1)),
@ -1070,7 +1071,7 @@ def global_unstructured(parameters, pruning_method, importance_scores=None, **kw
... amount=10,
... )
>>> print(sum(torch.nn.utils.parameters_to_vector(net.buffers()) == 0))
tensor(10, dtype=torch.uint8)
tensor(10)
"""
# ensure parameters is a list or generator of tuples
@ -1156,7 +1157,7 @@ def custom_from_mask(module, name, mask):
module (nn.Module): modified (i.e. pruned) version of the input module
Examples:
>>> # xdoctest: +SKIP
>>> from torch.nn.utils import prune
>>> m = prune.custom_from_mask(
... nn.Linear(5, 3), name='bias', mask=torch.tensor([0, 1, 0])
... )
@ -1211,8 +1212,8 @@ def is_pruned(module):
binary answer to whether ``module`` is pruned.
Examples:
>>> from torch.nn.utils import prune
>>> m = nn.Linear(5, 7)
>>> # xdoctest: +SKIP
>>> print(prune.is_pruned(m))
False
>>> prune.random_unstructured(m, name='weight', amount=0.2)

View File

@ -20,6 +20,7 @@ PackedSequence_.__annotations__ = {'data': torch.Tensor, 'batch_sizes': torch.Te
'sorted_indices': Optional[torch.Tensor],
'unsorted_indices': Optional[torch.Tensor]}
def bind(optional, fn):
if optional is None:
return None

View File

@ -18,7 +18,8 @@ class DiagnosticEngine:
Examples:
Step 1: Create a set of rules.
>>> rules = infra.RuleCollection.from_list(
>>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
>>> rules = infra.RuleCollection.custom_collection_from_list(
... "CustomRuleCollection",
... [
... infra.Rule(
@ -34,6 +35,7 @@ class DiagnosticEngine:
Step 3: Start a new diagnostic context.
>>> with engine.create_diagnostic_context("torch.onnx.export", version="1.0") as context:
... ...
Step 4: Add diagnostics in your code.
... context.diagnose(rules.rule1, infra.Level.ERROR)

View File

@ -63,6 +63,8 @@ class JitScalarType(enum.IntEnum):
Use ``JitScalarType`` to convert from torch and JIT scalar types to ONNX scalar types.
Examples:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_ONNX)
>>> # xdoctest: +IGNORE_WANT("win32 has different output")
>>> JitScalarType.from_value(torch.ones(1, 2)).onnx_type()
TensorProtoDataType.FLOAT

View File

@ -22,6 +22,7 @@ EPOCH_DEPRECATION_WARNING = (
"https://github.com/pytorch/pytorch/issues/new/choose."
)
class LRScheduler(object):
def __init__(self, optimizer, last_epoch=-1, verbose=False):
@ -196,10 +197,10 @@ class LambdaLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer has two groups.
>>> lambda1 = lambda epoch: epoch // 30
>>> lambda2 = lambda epoch: 0.95 ** epoch
>>> # xdoctest: +SKIP
>>> scheduler = LambdaLR(optimizer, lr_lambda=[lambda1, lambda2])
>>> for epoch in range(100):
>>> train(...)
@ -282,8 +283,8 @@ class MultiplicativeLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> lmbda = lambda epoch: 0.95
>>> # xdoctest: +SKIP
>>> lmbda = lambda epoch: 0.95
>>> scheduler = MultiplicativeLR(optimizer, lr_lambda=lmbda)
>>> for epoch in range(100):
>>> train(...)
@ -365,12 +366,12 @@ class StepLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer uses lr = 0.05 for all groups
>>> # lr = 0.05 if epoch < 30
>>> # lr = 0.005 if 30 <= epoch < 60
>>> # lr = 0.0005 if 60 <= epoch < 90
>>> # ...
>>> # xdoctest: +SKIP
>>> scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
>>> for epoch in range(100):
>>> train(...)
@ -414,11 +415,11 @@ class MultiStepLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer uses lr = 0.05 for all groups
>>> # lr = 0.05 if epoch < 30
>>> # lr = 0.005 if 30 <= epoch < 80
>>> # lr = 0.0005 if epoch >= 80
>>> # xdoctest: +SKIP
>>> scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)
>>> for epoch in range(100):
>>> train(...)
@ -463,13 +464,13 @@ class ConstantLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer uses lr = 0.05 for all groups
>>> # lr = 0.025 if epoch == 0
>>> # lr = 0.025 if epoch == 1
>>> # lr = 0.025 if epoch == 2
>>> # lr = 0.025 if epoch == 3
>>> # lr = 0.05 if epoch >= 4
>>> # xdoctest: +SKIP
>>> scheduler = ConstantLR(self.opt, factor=0.5, total_iters=4)
>>> for epoch in range(100):
>>> train(...)
@ -525,13 +526,13 @@ class LinearLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer uses lr = 0.05 for all groups
>>> # lr = 0.025 if epoch == 0
>>> # lr = 0.03125 if epoch == 1
>>> # lr = 0.0375 if epoch == 2
>>> # lr = 0.04375 if epoch == 3
>>> # lr = 0.05 if epoch >= 4
>>> # xdoctest: +SKIP
>>> scheduler = LinearLR(self.opt, start_factor=0.5, total_iters=4)
>>> for epoch in range(100):
>>> train(...)
@ -617,13 +618,13 @@ class SequentialLR(LRScheduler):
verbose (bool): Does nothing.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer uses lr = 1. for all groups
>>> # lr = 0.1 if epoch == 0
>>> # lr = 0.1 if epoch == 1
>>> # lr = 0.9 if epoch == 2
>>> # lr = 0.81 if epoch == 3
>>> # lr = 0.729 if epoch == 4
>>> # xdoctest: +SKIP
>>> scheduler1 = ConstantLR(self.opt, factor=0.1, total_iters=2)
>>> scheduler2 = ExponentialLR(self.opt, gamma=0.9)
>>> scheduler = SequentialLR(self.opt, schedulers=[scheduler1, scheduler2], milestones=[2])
@ -670,7 +671,6 @@ class SequentialLR(LRScheduler):
self._last_lr = schedulers[0].get_last_lr()
def step(self):
self.last_epoch += 1
idx = bisect_right(self._milestones, self.last_epoch)
@ -726,13 +726,13 @@ class PolynomialLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> # xdoctest: +SKIP("undefined vars")
>>> # Assuming optimizer uses lr = 0.001 for all groups
>>> # lr = 0.001 if epoch == 0
>>> # lr = 0.00075 if epoch == 1
>>> # lr = 0.00050 if epoch == 2
>>> # lr = 0.00025 if epoch == 3
>>> # lr = 0.0 if epoch >= 4
>>> # xdoctest: +SKIP("undefined vars")
>>> scheduler = PolynomialLR(self.opt, total_iters=4, power=1.0)
>>> for epoch in range(100):
>>> train(...)
@ -846,13 +846,13 @@ class ChainedScheduler(LRScheduler):
schedulers (list): List of chained schedulers.
Example:
>>> # xdoctest: +SKIP
>>> # Assuming optimizer uses lr = 1. for all groups
>>> # lr = 0.09 if epoch == 0
>>> # lr = 0.081 if epoch == 1
>>> # lr = 0.729 if epoch == 2
>>> # lr = 0.6561 if epoch == 3
>>> # lr = 0.59049 if epoch >= 4
>>> # xdoctest: +SKIP
>>> scheduler1 = ConstantLR(self.opt, factor=0.1, total_iters=2)
>>> scheduler2 = ExponentialLR(self.opt, gamma=0.9)
>>> scheduler = ChainedScheduler([scheduler1, scheduler2])
@ -1544,8 +1544,8 @@ class OneCycleLR(LRScheduler):
each update. Default: ``False``.
Example:
>>> data_loader = torch.utils.data.DataLoader(...)
>>> # xdoctest: +SKIP
>>> data_loader = torch.utils.data.DataLoader(...)
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(data_loader), epochs=10)
>>> for epoch in range(10):

View File

@ -9,6 +9,7 @@ from torch.optim.lr_scheduler import LRScheduler
__all__ = ['AveragedModel', 'update_bn', 'SWALR']
class AveragedModel(Module):
r"""Implements averaged model for Stochastic Weight Averaging (SWA).

View File

@ -49,6 +49,7 @@ __all__ = [
'StorageType',
]
class SourceChangeWarning(Warning):
pass
@ -186,10 +187,12 @@ def _cuda_deserialize(obj, location):
else:
return obj.cuda(device)
def _mps_deserialize(obj, location):
if location == 'mps':
return obj.mps()
def _meta_deserialize(obj, location):
if location == 'meta':
return torch.UntypedStorage(obj.nbytes(), device='meta')
@ -356,6 +359,7 @@ def _check_seekable(f) -> bool:
raise_err_msg(["seek", "tell"], e)
return False
def _check_dill_version(pickle_module) -> None:
'''Checks if using dill as the pickle module, and if so, checks if it is the correct version.
If dill version is lower than 0.3.1, a ValueError is raised.
@ -375,12 +379,14 @@ def _check_dill_version(pickle_module) -> None:
pickle_module.__version__
))
def _check_save_filelike(f):
if not isinstance(f, (str, os.PathLike)) and not hasattr(f, 'write'):
raise AttributeError((
"expected 'f' to be string, path, or a file-like object with "
"a 'write' attribute"))
def save(
obj: object,
f: FILE_LIKE,
@ -420,6 +426,7 @@ def save(
to use the old format, pass the kwarg ``_use_new_zipfile_serialization=False``.
Example:
>>> # xdoctest: +SKIP("makes cwd dirty")
>>> # Save to file
>>> x = torch.tensor([0, 1, 2, 3, 4])
>>> torch.save(x, 'tensor.pt')
@ -1087,6 +1094,7 @@ def _get_restore_location(map_location):
return result
return restore_location
class StorageType():
def __init__(self, name):
self.dtype = _get_dtype_from_pickle_storage_type(name)
@ -1094,6 +1102,7 @@ class StorageType():
def __str__(self):
return f'StorageType(dtype={self.dtype})'
def _load(zip_file, map_location, pickle_module, pickle_file='data.pkl', **pickle_load_args):
restore_location = _get_restore_location(map_location)

View File

@ -90,6 +90,8 @@ def make_tensor(
TypeError: If :attr:`dtype` isn't supported by this function.
Examples:
>>> # xdoctest: +SKIP
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
>>> from torch.testing import make_tensor
>>> # Creates a float tensor with values in [-1, 1)
>>> make_tensor((3,), device='cpu', dtype=torch.float32, low=-1, high=1)

View File

@ -54,6 +54,7 @@ def skip_unless_torch_gpu(method: T) -> T:
"""
Test decorator which skips the test unless there's a GPU available to torch.
>>> # xdoctest: +SKIP
>>> @skip_unless_torch_gpu
>>> def test_some_method(self) -> None:
>>> ...

View File

@ -22,6 +22,7 @@ def rename_privateuse1_backend(backend_name: str) -> None:
Example::
>>> # xdoctest: +SKIP("failing")
>>> torch.register_privateuse1_backend("foo")
# This will work, assuming that you've implemented the right C++ kernels
# to implement torch.ones.

View File

@ -912,6 +912,7 @@ def CppExtension(name, sources, *args, **kwargs):
Example:
>>> # xdoctest: +SKIP
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
>>> from setuptools import setup
>>> from torch.utils.cpp_extension import BuildExtension, CppExtension
>>> setup(
@ -959,6 +960,7 @@ def CUDAExtension(name, sources, *args, **kwargs):
Example:
>>> # xdoctest: +SKIP
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
>>> from setuptools import setup
>>> from torch.utils.cpp_extension import BuildExtension, CUDAExtension
>>> setup(
@ -1006,14 +1008,12 @@ def CUDAExtension(name, sources, *args, **kwargs):
To workaround the issue, move python binding logic to pure C++ file.
Example use:
>>> # xdoctest: +SKIP
>>> #include <ATen/ATen.h>
>>> at::Tensor SigmoidAlphaBlendForwardCuda(....)
#include <ATen/ATen.h>
at::Tensor SigmoidAlphaBlendForwardCuda(....)
Instead of:
>>> # xdoctest: +SKIP
>>> #include <torch/extension.h>
>>> torch::Tensor SigmoidAlphaBlendForwardCuda(...)
#include <torch/extension.h>
torch::Tensor SigmoidAlphaBlendForwardCuda(...)
Currently open issue for nvcc bug: https://github.com/pytorch/pytorch/issues/69460
Complete workaround code example: https://github.com/facebookresearch/pytorch3d/commit/cb170ac024a949f1f9614ffe6af1c38d972f7d48
@ -1037,6 +1037,7 @@ def CUDAExtension(name, sources, *args, **kwargs):
Example:
>>> # xdoctest: +SKIP
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
>>> CUDAExtension(
... name='cuda_extension',
... sources=['extension.cpp', 'extension_kernel.cu'],
@ -1362,6 +1363,7 @@ def load_inline(name,
causes issues.
Example:
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
>>> from torch.utils.cpp_extension import load_inline
>>> source = """
at::Tensor sin_add(at::Tensor x, at::Tensor y) {

View File

@ -33,11 +33,11 @@ def default_convert(data):
data: a single data point to be converted
Examples:
>>> # xdoctest: +SKIP
>>> # Example with `int`
>>> default_convert(0)
0
>>> # Example with NumPy array
>>> # xdoctest: +SKIP
>>> default_convert(np.array([0, 1]))
tensor([0, 1])
>>> # Example with NamedTuple
@ -228,6 +228,7 @@ def default_collate(batch):
batch: a single batch to be collated
Examples:
>>> # xdoctest: +SKIP
>>> # Example with a batch of `int`s:
>>> default_collate([0, 1, 2, 3])
tensor([0, 1, 2, 3])
@ -238,7 +239,6 @@ def default_collate(batch):
>>> default_collate([{'A': 0, 'B': 1}, {'A': 100, 'B': 100}])
{'A': tensor([ 0, 100]), 'B': tensor([ 1, 100])}
>>> # Example with `NamedTuple` inside the batch:
>>> # xdoctest: +SKIP
>>> Point = namedtuple('Point', ['x', 'y'])
>>> default_collate([Point(0, 0), Point(1, 1)])
Point(x=tensor([0, 1]), y=tensor([0, 1]))

View File

@ -183,7 +183,9 @@ class CollatorIterDataPipe(MapperIterDataPipe):
collate_fn: Customized collate function to collect and combine data or a batch of data.
Default function collates to Tensor(s) based on data type.
Example: Convert integer data to float Tensor
Example:
>>> # xdoctest: +SKIP
>>> # Convert integer data to float Tensor
>>> class MyIterDataPipe(torch.utils.data.IterDataPipe):
... def __init__(self, start, end):
... super(MyIterDataPipe).__init__()
@ -203,7 +205,6 @@ class CollatorIterDataPipe(MapperIterDataPipe):
>>> def collate_fn(batch):
... return torch.tensor(batch, dtype=torch.float)
...
>>> # xdoctest: +SKIP
>>> collated_ds = CollateIterDataPipe(ds, collate_fn=collate_fn)
>>> print(list(collated_ds))
[tensor(3.), tensor(4.), tensor(5.), tensor(6.)]

View File

@ -30,6 +30,7 @@ def validate_input_col(fn: Callable, input_col: Optional[Union[int, tuple, list]
keyword-only arguments.
Examples:
>>> # xdoctest: +SKIP("Failing on some CI machines")
>>> def f(a, b, *, c=1):
>>> return a + b + c
>>> def f_def(a, b=1, *, c=1):
@ -117,6 +118,7 @@ def _is_local_fn(fn):
return "<locals>" in fn_type.__qualname__
return False
def _check_unpickable_fn(fn: Callable):
"""
Checks function is pickable or not. If it is a lambda or local function, a UserWarning

View File

@ -81,6 +81,8 @@ class IterableDataset(Dataset[T_co]):
Example 1: splitting workload across all workers in :meth:`__iter__`::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
>>> # xdoctest: +SKIP("Fails on MacOS12")
>>> class MyIterableDataset(torch.utils.data.IterableDataset):
... def __init__(self, start, end):
... super(MyIterableDataset).__init__()
@ -122,6 +124,7 @@ class IterableDataset(Dataset[T_co]):
Example 2: splitting workload across all workers using :attr:`worker_init_fn`::
>>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
>>> class MyIterableDataset(torch.utils.data.IterableDataset):
... def __init__(self, start, end):
... super(MyIterableDataset).__init__()
@ -313,9 +316,12 @@ def random_split(dataset: Dataset[T], lengths: Sequence[Union[int, float]],
Optionally fix the generator for reproducible results, e.g.:
>>> random_split(range(10), [3, 7], generator=torch.Generator().manual_seed(42))
>>> random_split(range(30), [0.3, 0.3, 0.4], generator=torch.Generator(
... ).manual_seed(42))
Example:
>>> # xdoctest: +SKIP
>>> generator1 = torch.Generator().manual_seed(42)
>>> generator2 = torch.Generator().manual_seed(42)
>>> random_split(range(10), [3, 7], generator=generator1)
>>> random_split(range(30), [0.3, 0.3, 0.4], generator=generator2)
Args:
dataset (Dataset): Dataset to be split

View File

@ -53,6 +53,7 @@ __all__ = ['InputError', 'openf', 'bcolors', 'GeneratedFileCleaner', 'match_exte
'is_caffe2_gpu_file', 'Trie', 'preprocessor', 'file_specific_replacement', 'file_add_header',
'fix_static_global_kernels', 'extract_arguments', 'str2bool', 'hipify']
class InputError(Exception):
# Exception raised for errors in the input.
@ -79,6 +80,7 @@ class bcolors:
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
# To the programmer, the output of hipify most likely are intermediates.
# This class allows users of hipify to ask for a cleanup by running the
# hipify and compilation in a with instantiating this context manager class
@ -119,13 +121,16 @@ class GeneratedFileCleaner:
for d in self.dirs_to_clean[::-1]:
os.rmdir(d)
def match_extensions(filename: str, extensions: Iterable) -> bool:
"""Helper method to see if filename ends with certain extension"""
return any(filename.endswith(e) for e in extensions)
def _fnmatch(filepath, patterns):
return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns)
def matched_files_iter(
root_path: str,
includes: Iterable = (),
@ -407,10 +412,8 @@ def find_closure_group(input_string, start, group):
find_closure_group returns the positions of group[0] and group[1] as a tuple.
Example:
find_closure_group("(hi)", 0, ["(", ")"])
Returns:
0, 3
>>> find_closure_group("(hi)", 0, ["(", ")"])
(0, 3)
"""
inside_parenthesis = False
@ -522,7 +525,7 @@ def get_hip_file_path(rel_filepath, is_pytorch_extension=False):
"""
# At the moment, some PyTorch source files are HIPified in place. The predicate
# is_out_of_place tells us if this is the case or not.
assert(not os.path.isabs(rel_filepath))
assert not os.path.isabs(rel_filepath)
if not is_pytorch_extension and not is_out_of_place(rel_filepath):
return rel_filepath
@ -589,7 +592,7 @@ def get_hip_file_path(rel_filepath, is_pytorch_extension=False):
def is_out_of_place(rel_filepath):
assert(not os.path.isabs(rel_filepath))
assert not os.path.isabs(rel_filepath)
if rel_filepath.startswith("torch/"):
return False
if rel_filepath.startswith("tools/autograd/templates/"):
@ -599,7 +602,7 @@ def is_out_of_place(rel_filepath):
# Keep this synchronized with includes/ignores in build_amd.py
def is_pytorch_file(rel_filepath):
assert(not os.path.isabs(rel_filepath))
assert not os.path.isabs(rel_filepath)
if rel_filepath.startswith("aten/"):
if rel_filepath.startswith("aten/src/ATen/core/"):
return False
@ -616,8 +619,9 @@ def is_cusparse_file(rel_filepath):
return "sparse" in rel_filepath.lower()
return False
def is_caffe2_gpu_file(rel_filepath):
assert(not os.path.isabs(rel_filepath))
assert not os.path.isabs(rel_filepath)
if rel_filepath.startswith("c10/cuda"):
return True
filename = os.path.basename(rel_filepath)
@ -732,6 +736,8 @@ Returns a dict with the following keys:
"skipped" if an identical hipified file already existed or hipified file couldn't be written out
"ignored" if the source file was a hipified file itself or not meant to be hipified
"""
def preprocessor(
output_directory: str,
filepath: str,
@ -885,6 +891,7 @@ def preprocessor(
else:
return {"hipified_path": fout_path, "status": "[skipped, already hipified]"}
def file_specific_replacement(filepath, search_string, replace_string, strict=False):
with openf(filepath, "r+") as f:
contents = f.read()