mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-08 07:39:33 +01:00
This is a new version of #15648 based on the latest master branch. Unlike the previous PR where I fixed a lot of the doctests in addition to integrating xdoctest, I'm going to reduce the scope here. I'm simply going to integrate xdoctest, and then I'm going to mark all of the failing tests as "SKIP". This will let xdoctest run on the dashboards, provide some value, and still let the dashboards pass. I'll leave fixing the doctests themselves to another PR. In my initial commit, I do the bare minimum to get something running with failing dashboards. The few tests that I marked as skip are causing segfaults. Running xdoctest results in 293 failed, 201 passed tests. The next commits will be to disable those tests. (unfortunately I don't have a tool that will insert the `#xdoctest: +SKIP` directive over every failing test, so I'm going to do this mostly manually.) Fixes https://github.com/pytorch/pytorch/issues/71105 @ezyang Pull Request resolved: https://github.com/pytorch/pytorch/pull/82797 Approved by: https://github.com/ezyang
52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
|
|
import sys
|
|
import torch
|
|
|
|
|
|
def is_available():
|
|
return hasattr(torch._C, "_dist_autograd_init")
|
|
|
|
|
|
if is_available() and not torch._C._dist_autograd_init():
|
|
raise RuntimeError("Failed to initialize torch.distributed.autograd")
|
|
|
|
if is_available():
|
|
from torch._C._distributed_autograd import (
|
|
get_gradients,
|
|
backward,
|
|
_init,
|
|
_new_context,
|
|
_release_context,
|
|
_get_max_id,
|
|
_is_valid_context,
|
|
_retrieve_context,
|
|
_current_context,
|
|
_get_debug_info,
|
|
DistAutogradContext,
|
|
)
|
|
|
|
class context(object):
|
|
'''
|
|
Context object to wrap forward and backward passes when using
|
|
distributed autograd. The ``context_id`` generated in the ``with``
|
|
statement is required to uniquely identify a distributed backward pass
|
|
on all workers. Each worker stores metadata associated with this
|
|
``context_id``, which is required to correctly execute a distributed
|
|
autograd pass.
|
|
|
|
Example::
|
|
>>> import torch.distributed.autograd as dist_autograd
|
|
>>> # xdoctest: +SKIP
|
|
>>> with dist_autograd.context() as context_id:
|
|
>>> t1 = torch.rand((3, 3), requires_grad=True)
|
|
>>> t2 = torch.rand((3, 3), requires_grad=True)
|
|
>>> loss = rpc.rpc_sync("worker1", torch.add, args=(t1, t2)).sum()
|
|
>>> dist_autograd.backward(context_id, [loss])
|
|
'''
|
|
def __enter__(self):
|
|
self.autograd_context = _new_context()
|
|
return self.autograd_context._context_id()
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
_release_context(self.autograd_context._context_id())
|