mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/50622 1. Define a DDPLoggingData struct that is the placeholder for all the ddp related logging fields 2. Put the DDPLoggingData struct in the C10 directory so that it can be easily imported by c10 and torch files 3. Expose get_ddp_logging_data() method in python so that users can get the logging data and dump in their applications 4. Unit test tested the logging data can be set and got as expected 5. Follow up will add more logging fields such as perf stats, internal states, env variables and etc ghstack-source-id: 120275870 Test Plan: unit tests Reviewed By: SciPioneer Differential Revision: D25930527 fbshipit-source-id: 290c200161019c58e28eed9a5a2a7a8153113f99
52 lines
1.5 KiB
Python
52 lines
1.5 KiB
Python
|
|
import torch
|
|
import sys
|
|
|
|
|
|
def is_available():
|
|
"""
|
|
Returns ``True`` if the distributed package is available. Otherwise,
|
|
``torch.distributed`` does not expose any other APIs. Currently,
|
|
``torch.distributed`` is available on Linux, MacOS and Windows. Set
|
|
``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source.
|
|
Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows,
|
|
``USE_DISTRIBUTED=0`` for MacOS.
|
|
"""
|
|
return hasattr(torch._C, "_c10d_init")
|
|
|
|
|
|
if is_available() and not torch._C._c10d_init():
|
|
raise RuntimeError("Failed to initialize torch.distributed")
|
|
|
|
|
|
if is_available():
|
|
from torch._C._distributed_c10d import (
|
|
Store,
|
|
FileStore,
|
|
TCPStore,
|
|
ProcessGroup,
|
|
Reducer,
|
|
BuiltinCommHookType,
|
|
_DEFAULT_FIRST_BUCKET_BYTES,
|
|
_GradBucket,
|
|
_register_comm_hook,
|
|
_register_builtin_comm_hook,
|
|
_broadcast_coalesced,
|
|
_compute_bucket_assignment_by_size,
|
|
_test_python_store,
|
|
_set_construction_logging_data,
|
|
_get_ddp_logging_data
|
|
)
|
|
if sys.platform != 'win32':
|
|
from torch._C._distributed_c10d import (
|
|
HashStore,
|
|
_round_robin_process_groups,
|
|
)
|
|
|
|
from .distributed_c10d import *
|
|
# Variables prefixed with underscore are not auto imported
|
|
# See the comment in `distributed_c10d.py` above `_backend` on why we expose
|
|
# this.
|
|
|
|
from .distributed_c10d import _backend
|