mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Fixes #112639 ```txt torch/utils/_sympy/value_ranges.py torch/utils/_sympy/value_ranges.py:60 in public class `ValueRanges`: D101: Missing docstring in public class torch/utils/_sympy/value_ranges.py:68 in public method `__init__`: D107: Missing docstring in __init__ torch/utils/_sympy/value_ranges.py:81 in public method `__contains__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:86 in public method `tighten`: D400: First line should end with a period (not 'n') torch/utils/_sympy/value_ranges.py:90 in public method `__and__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:103 in public method `__or__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:113 in public method `is_singleton`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:118 in public method `unknown`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:122 in public method `wrap`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:129 in public method `increasing_map`: D400: First line should end with a period (not ')') torch/utils/_sympy/value_ranges.py:135 in public method `decreasing_map`: D400: First line should end with a period (not ')') torch/utils/_sympy/value_ranges.py:141 in public method `monotone_map`: D400: First line should end with a period (not 'g') torch/utils/_sympy/value_ranges.py:149 in public method `convex_min_zero_map`: D400: First line should end with a period (not '0') torch/utils/_sympy/value_ranges.py:149 in public method `convex_min_zero_map`: D403: First word of the first line should be properly capitalized ('Fn', not 'fn') torch/utils/_sympy/value_ranges.py:158 in public method `coordinatewise_increasing_map`: D205: 1 blank line required between summary line and description (found 0) torch/utils/_sympy/value_ranges.py:158 in public method `coordinatewise_increasing_map`: D400: First line should end with a period (not ':') torch/utils/_sympy/value_ranges.py:171 in public method `coordinatewise_monotone_map`: D400: First line should end with a period (not 'e') torch/utils/_sympy/value_ranges.py:180 in private class `SymPyValueRangeAnalysis`: D205: 1 blank line required between summary line and description (found 0) torch/utils/_sympy/value_ranges.py:180 in private class `SymPyValueRangeAnalysis`: D400: First line should end with a period (not 's') torch/utils/_sympy/value_ranges.py:386 in private method `reciprocal`: D210: No whitespaces allowed surrounding docstring text torch/utils/_sympy/value_ranges.py:386 in private method `reciprocal`: D400: First line should end with a period (not 'n') torch/utils/_sympy/value_ranges.py:488 in public class `ValueRangeAnalysis`: D101: Missing docstring in public class torch/utils/_sympy/value_ranges.py:489 in public method `__init__`: D107: Missing docstring in __init__ torch/utils/_sympy/value_ranges.py:501 in public method `bool_handler`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:506 in public method `default_handler`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:511 in public method `load`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:514 in public method `store`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:517 in public method `reduction`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:520 in public method `index_expr`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:525 in public method `to_dtype`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:558 in public method `square`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:562 in public method `neg`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:566 in public method `truncdiv`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:577 in public method `sub`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:580 in public method `__getattr__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:585 in public function `bound_sympy`: D103: Missing docstring in public function 36 torch/utils/_sympy/value_ranges.py:60 in public class `ValueRanges`: D101: Missing docstring in public class torch/utils/_sympy/value_ranges.py:68 in public method `__init__`: D107: Missing docstring in __init__ torch/utils/_sympy/value_ranges.py:81 in public method `__contains__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:86 in public method `tighten`: D400: First line should end with a period (not 'n') torch/utils/_sympy/value_ranges.py:90 in public method `__and__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:103 in public method `__or__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:113 in public method `is_singleton`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:118 in public method `unknown`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:122 in public method `wrap`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:182 in private class `SymPyValueRangeAnalysis`: D205: 1 blank line required between summary line and description (found 0) torch/utils/_sympy/value_ranges.py:182 in private class `SymPyValueRangeAnalysis`: D400: First line should end with a period (not 's') torch/utils/_sympy/value_ranges.py:388 in private method `reciprocal`: D210: No whitespaces allowed surrounding docstring text torch/utils/_sympy/value_ranges.py:388 in private method `reciprocal`: D400: First line should end with a period (not 'n') torch/utils/_sympy/value_ranges.py:490 in public class `ValueRangeAnalysis`: D101: Missing docstring in public class torch/utils/_sympy/value_ranges.py:491 in public method `__init__`: D107: Missing docstring in __init__ torch/utils/_sympy/value_ranges.py:503 in public method `bool_handler`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:508 in public method `default_handler`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:513 in public method `load`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:516 in public method `store`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:519 in public method `reduction`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:522 in public method `index_expr`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:527 in public method `to_dtype`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:560 in public method `square`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:564 in public method `neg`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:568 in public method `truncdiv`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:579 in public method `sub`: D102: Missing docstring in public method torch/utils/_sympy/value_ranges.py:582 in public method `__getattr__`: D105: Missing docstring in magic method torch/utils/_sympy/value_ranges.py:587 in public function `bound_sympy`: D103: Missing docstring in public function 28 torch/utils/viz/_cycles.py torch/utils/viz/_cycles.py:14 in public function `observe_garbage`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:207 in public function `object_annotation`: D205: 1 blank line required between summary line and description (found 0) torch/utils/viz/_cycles.py:207 in public function `object_annotation`: D400: First line should end with a period (not 'g') torch/utils/viz/_cycles.py:256 in public class `Node`: D101: Missing docstring in public class torch/utils/viz/_cycles.py:262 in public function `create_graph`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:308 in public function `escape`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:312 in public function `is_cuda_tensor`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:315 in public function `cuda_allocation_context`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:335 in public function `to_dot`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:406 in public function `to_html`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:416 in public function `observe_tensor_cycles`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:429 in public function `warn_tensor_cycles`: D205: 1 blank line required between summary line and description (found 0) torch/utils/viz/_cycles.py:429 in public function `warn_tensor_cycles`: D400: First line should end with a period (not 'p') torch/utils/viz/_cycles.py:429 in public function `warn_tensor_cycles`: D401: First line should be in imperative mood; try rephrasing (found 'Reference') 14 torch/utils/viz/_cycles.py:14 in public function `observe_garbage`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:256 in public class `Node`: D101: Missing docstring in public class torch/utils/viz/_cycles.py:262 in public function `create_graph`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:308 in public function `escape`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:312 in public function `is_cuda_tensor`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:315 in public function `cuda_allocation_context`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:335 in public function `to_dot`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:406 in public function `to_html`: D103: Missing docstring in public function torch/utils/viz/_cycles.py:416 in public function `observe_tensor_cycles`: D103: Missing docstring in public function 9 torch/distributed/argparse_util.py torch/distributed/argparse_util.py:1 at module level: D100: Missing docstring in public module torch/distributed/argparse_util.py:13 in public class `env`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/argparse_util.py:13 in public class `env`: D400: First line should end with a period (not 'g') torch/distributed/argparse_util.py:13 in public class `env`: D412: No blank lines allowed between a section header and its content ('Example') torch/distributed/argparse_util.py:43 in public method `__init__`: D107: Missing docstring in __init__ torch/distributed/argparse_util.py:56 in public method `__call__`: D102: Missing docstring in public method torch/distributed/argparse_util.py:61 in public class `check_env`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/argparse_util.py:61 in public class `check_env`: D400: First line should end with a period (not 's') torch/distributed/argparse_util.py:61 in public class `check_env`: D412: No blank lines allowed between a section header and its content ('Example') torch/distributed/argparse_util.py:97 in public method `__init__`: D107: Missing docstring in __init__ torch/distributed/argparse_util.py:102 in public method `__call__`: D102: Missing docstring in public method 11 torch/distributed/argparse_util.py:1 at module level: D100: Missing docstring in public module torch/distributed/argparse_util.py:43 in public method `__init__`: D107: Missing docstring in __init__ torch/distributed/argparse_util.py:56 in public method `__call__`: D102: Missing docstring in public method torch/distributed/argparse_util.py:97 in public method `__init__`: D107: Missing docstring in __init__ torch/distributed/argparse_util.py:102 in public method `__call__`: D102: Missing docstring in public method 5 torch/distributed/_composable_state.py torch/distributed/_composable_state.py:20 in private function `_get_module_state`: D202: No blank lines allowed after function docstring (found 1) torch/distributed/_composable_state.py:20 in private function `_get_module_state`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/_composable_state.py:20 in private function `_get_module_state`: D400: First line should end with a period (not '`') 3 0 torch/distributed/launch.py torch/distributed/launch.py:1 at module level: D205: 1 blank line required between summary line and description (found 0) torch/distributed/launch.py:1 at module level: D400: First line should end with a period (not 'd') torch/distributed/launch.py:156 in public function `parse_args`: D103: Missing docstring in public function torch/distributed/launch.py:171 in public function `launch`: D103: Missing docstring in public function torch/distributed/launch.py:180 in public function `main`: D103: Missing docstring in public function 5 torch/distributed/launch.py:157 in public function `parse_args`: D103: Missing docstring in public function torch/distributed/launch.py:172 in public function `launch`: D103: Missing docstring in public function torch/distributed/launch.py:181 in public function `main`: D103: Missing docstring in public function 3 torch/distributed/remote_device.py torch/distributed/remote_device.py:1 at module level: D100: Missing docstring in public module torch/distributed/remote_device.py:81 in private method `worker_name`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/remote_device.py:81 in private method `worker_name`: D401: First line should be in imperative mood (perhaps 'Return', not 'Returns') torch/distributed/remote_device.py:88 in private method `rank`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/remote_device.py:88 in private method `rank`: D401: First line should be in imperative mood (perhaps 'Return', not 'Returns') torch/distributed/remote_device.py:95 in private method `device`: D200: One-line docstring should fit on one line with quotes (found 3) torch/distributed/remote_device.py:95 in private method `device`: D401: First line should be in imperative mood (perhaps 'Return', not 'Returns') 7 torch/distributed/remote_device.py:1 at module level: D100: Missing docstring in public module torch/distributed/remote_device.py:85 in private method `rank`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/remote_device.py:85 in private method `rank`: D401: First line should be in imperative mood (perhaps 'Return', not 'Returns') 3 torch/distributed/rendezvous.py torch/distributed/rendezvous.py:1 at module level: D100: Missing docstring in public module torch/distributed/rendezvous.py:23 in public function `register_rendezvous_handler`: D401: First line should be in imperative mood (perhaps 'Register', not 'Registers') torch/distributed/rendezvous.py:88 in public function `rendezvous`: D103: Missing docstring in public function torch/distributed/rendezvous.py:147 in private function `_create_c10d_store`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/rendezvous.py:147 in private function `_create_c10d_store`: D400: First line should end with a period (not 'r') 5 torch/distributed/rendezvous.py:1 at module level: D100: Missing docstring in public module torch/distributed/rendezvous.py:89 in public function `rendezvous`: D103: Missing docstring in public function 2 torch/distributed/run.py torch/distributed/run.py:9 at module level: D205: 1 blank line required between summary line and description (found 0) torch/distributed/run.py:9 at module level: D400: First line should end with a period (not '`') torch/distributed/run.py:393 in public function `get_args_parser`: D202: No blank lines allowed after function docstring (found 1) torch/distributed/run.py:393 in public function `get_args_parser`: D401: First line should be in imperative mood; try rephrasing (found 'Helper') torch/distributed/run.py:610 in public function `parse_args`: D103: Missing docstring in public function torch/distributed/run.py:615 in public function `parse_min_max_nnodes`: D103: Missing docstring in public function torch/distributed/run.py:629 in public function `determine_local_world_size`: D103: Missing docstring in public function torch/distributed/run.py:670 in public function `get_rdzv_endpoint`: D103: Missing docstring in public function torch/distributed/run.py:677 in public function `get_use_env`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/run.py:677 in public function `get_use_env`: D401: First line should be in imperative mood (perhaps 'Retrieve', not 'Retrieves') torch/distributed/run.py:689 in public function `config_from_args`: D103: Missing docstring in public function torch/distributed/run.py:770 in public function `run_script_path`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/run.py:770 in public function `run_script_path`: D401: First line should be in imperative mood (perhaps 'Run', not 'Runs') torch/distributed/run.py:781 in public function `run`: D103: Missing docstring in public function torch/distributed/run.py:804 in public function `main`: D103: Missing docstring in public function 15 torch/distributed/run.py:611 in public function `parse_args`: D103: Missing docstring in public function torch/distributed/run.py:616 in public function `parse_min_max_nnodes`: D103: Missing docstring in public function torch/distributed/run.py:630 in public function `determine_local_world_size`: D103: Missing docstring in public function torch/distributed/run.py:671 in public function `get_rdzv_endpoint`: D103: Missing docstring in public function torch/distributed/run.py:691 in public function `config_from_args`: D103: Missing docstring in public function torch/distributed/run.py:784 in public function `run`: D103: Missing docstring in public function torch/distributed/run.py:807 in public function `main`: D103: Missing docstring in public function 7 torch/distributed/__init__.py torch/distributed/__init__.py:1 at module level: D104: Missing docstring in public package torch/distributed/__init__.py:8 in public function `is_available`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/__init__.py:8 in public function `is_available`: D400: First line should end with a period (not ',') torch/distributed/__init__.py:8 in public function `is_available`: D401: First line should be in imperative mood (perhaps 'Return', not 'Returns') 4 torch/distributed/__init__.py:1 at module level: D104: Missing docstring in public package 1 torch/distributed/utils.py:1 at module level: D100: Missing docstring in public module torch/distributed/utils.py:16 in private function `_pack_kwargs`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/utils.py:16 in private function `_pack_kwargs`: D400: First line should end with a period (not ')') torch/distributed/utils.py:47 in private function `_cast_forward_inputs`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/utils.py:88 in private function `_recursive_to`: D200: One-line docstring should fit on one line with quotes (found 3) torch/distributed/utils.py:141 in private function `_p_assert`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/utils.py:141 in private function `_p_assert`: D209: Multi-line docstring closing quotes should be on a separate line torch/distributed/utils.py:141 in private function `_p_assert`: D400: First line should end with a period (not 't') torch/distributed/utils.py:141 in private function `_p_assert`: D401: First line should be in imperative mood; try rephrasing (found 'This') torch/distributed/utils.py:275 in private function `_sync_module_states`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/utils.py:275 in private function `_sync_module_states`: D400: First line should end with a period (not 'n') torch/distributed/utils.py:275 in private function `_sync_module_states`: D401: First line should be in imperative mood (perhaps 'Sync', not 'Syncs') torch/distributed/utils.py:300 in private function `_sync_params_and_buffers`: D205: 1 blank line required between summary line and description (found 0) torch/distributed/utils.py:300 in private function `_sync_params_and_buffers`: D400: First line should end with a period (not 'y') torch/distributed/utils.py:300 in private function `_sync_params_and_buffers`: D401: First line should be in imperative mood (perhaps 'Synchronize', not 'Synchronizes') 15 torch/distributed/utils.py:1 at module level: D100: Missing docstring in public module 1 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/112953 Approved by: https://github.com/weifengpy
199 lines
6.7 KiB
Python
199 lines
6.7 KiB
Python
r"""
|
|
Module ``torch.distributed.launch``.
|
|
|
|
``torch.distributed.launch`` is a module that spawns up multiple distributed
|
|
training processes on each of the training nodes.
|
|
|
|
.. warning::
|
|
|
|
This module is going to be deprecated in favor of :ref:`torchrun <launcher-api>`.
|
|
|
|
The utility can be used for single-node distributed training, in which one or
|
|
more processes per node will be spawned. The utility can be used for either
|
|
CPU training or GPU training. If the utility is used for GPU training,
|
|
each distributed process will be operating on a single GPU. This can achieve
|
|
well-improved single-node training performance. It can also be used in
|
|
multi-node distributed training, by spawning up multiple processes on each node
|
|
for well-improved multi-node distributed training performance as well.
|
|
This will especially be beneficial for systems with multiple Infiniband
|
|
interfaces that have direct-GPU support, since all of them can be utilized for
|
|
aggregated communication bandwidth.
|
|
|
|
In both cases of single-node distributed training or multi-node distributed
|
|
training, this utility will launch the given number of processes per node
|
|
(``--nproc-per-node``). If used for GPU training, this number needs to be less
|
|
or equal to the number of GPUs on the current system (``nproc_per_node``),
|
|
and each process will be operating on a single GPU from *GPU 0 to
|
|
GPU (nproc_per_node - 1)*.
|
|
|
|
**How to use this module:**
|
|
|
|
1. Single-Node multi-process distributed training
|
|
|
|
::
|
|
|
|
python -m torch.distributed.launch --nproc-per-node=NUM_GPUS_YOU_HAVE
|
|
YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other
|
|
arguments of your training script)
|
|
|
|
2. Multi-Node multi-process distributed training: (e.g. two nodes)
|
|
|
|
|
|
Node 1: *(IP: 192.168.1.1, and has a free port: 1234)*
|
|
|
|
::
|
|
|
|
python -m torch.distributed.launch --nproc-per-node=NUM_GPUS_YOU_HAVE
|
|
--nnodes=2 --node-rank=0 --master-addr="192.168.1.1"
|
|
--master-port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
|
|
and all other arguments of your training script)
|
|
|
|
Node 2:
|
|
|
|
::
|
|
|
|
python -m torch.distributed.launch --nproc-per-node=NUM_GPUS_YOU_HAVE
|
|
--nnodes=2 --node-rank=1 --master-addr="192.168.1.1"
|
|
--master-port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
|
|
and all other arguments of your training script)
|
|
|
|
3. To look up what optional arguments this module offers:
|
|
|
|
::
|
|
|
|
python -m torch.distributed.launch --help
|
|
|
|
|
|
**Important Notices:**
|
|
|
|
1. This utility and multi-process distributed (single-node or
|
|
multi-node) GPU training currently only achieves the best performance using
|
|
the NCCL distributed backend. Thus NCCL backend is the recommended backend to
|
|
use for GPU training.
|
|
|
|
2. In your training program, you must parse the command-line argument:
|
|
``--local-rank=LOCAL_PROCESS_RANK``, which will be provided by this module.
|
|
If your training program uses GPUs, you should ensure that your code only
|
|
runs on the GPU device of LOCAL_PROCESS_RANK. This can be done by:
|
|
|
|
Parsing the local_rank argument
|
|
|
|
::
|
|
|
|
>>> # xdoctest: +SKIP
|
|
>>> import argparse
|
|
>>> parser = argparse.ArgumentParser()
|
|
>>> parser.add_argument("--local-rank", type=int)
|
|
>>> args = parser.parse_args()
|
|
|
|
Set your device to local rank using either
|
|
|
|
::
|
|
|
|
>>> torch.cuda.set_device(args.local_rank) # before your code runs
|
|
|
|
or
|
|
|
|
::
|
|
|
|
>>> with torch.cuda.device(args.local_rank):
|
|
>>> # your code to run
|
|
>>> ...
|
|
|
|
3. In your training program, you are supposed to call the following function
|
|
at the beginning to start the distributed backend. It is strongly recommended
|
|
that ``init_method=env://``. Other init methods (e.g. ``tcp://``) may work,
|
|
but ``env://`` is the one that is officially supported by this module.
|
|
|
|
::
|
|
|
|
>>> torch.distributed.init_process_group(backend='YOUR BACKEND',
|
|
>>> init_method='env://')
|
|
|
|
4. In your training program, you can either use regular distributed functions
|
|
or use :func:`torch.nn.parallel.DistributedDataParallel` module. If your
|
|
training program uses GPUs for training and you would like to use
|
|
:func:`torch.nn.parallel.DistributedDataParallel` module,
|
|
here is how to configure it.
|
|
|
|
::
|
|
|
|
>>> model = torch.nn.parallel.DistributedDataParallel(model,
|
|
>>> device_ids=[args.local_rank],
|
|
>>> output_device=args.local_rank)
|
|
|
|
Please ensure that ``device_ids`` argument is set to be the only GPU device id
|
|
that your code will be operating on. This is generally the local rank of the
|
|
process. In other words, the ``device_ids`` needs to be ``[args.local_rank]``,
|
|
and ``output_device`` needs to be ``args.local_rank`` in order to use this
|
|
utility
|
|
|
|
5. Another way to pass ``local_rank`` to the subprocesses via environment variable
|
|
``LOCAL_RANK``. This behavior is enabled when you launch the script with
|
|
``--use-env=True``. You must adjust the subprocess example above to replace
|
|
``args.local_rank`` with ``os.environ['LOCAL_RANK']``; the launcher
|
|
will not pass ``--local-rank`` when you specify this flag.
|
|
|
|
.. warning::
|
|
|
|
``local_rank`` is NOT globally unique: it is only unique per process
|
|
on a machine. Thus, don't use it to decide if you should, e.g.,
|
|
write to a networked filesystem. See
|
|
https://github.com/pytorch/pytorch/issues/12042 for an example of
|
|
how things can go wrong if you don't do this correctly.
|
|
|
|
|
|
|
|
"""
|
|
|
|
import logging
|
|
import warnings
|
|
|
|
from torch.distributed.run import get_args_parser, run
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def parse_args(args):
|
|
parser = get_args_parser()
|
|
parser.add_argument(
|
|
"--use-env",
|
|
"--use_env",
|
|
default=False,
|
|
action="store_true",
|
|
help="Use environment variable to pass "
|
|
"'local rank'. For legacy reasons, the default value is False. "
|
|
"If set to True, the script will not pass "
|
|
"--local-rank as argument, and will instead set LOCAL_RANK.",
|
|
)
|
|
return parser.parse_args(args)
|
|
|
|
|
|
def launch(args):
|
|
if args.no_python and not args.use_env:
|
|
raise ValueError(
|
|
"When using the '--no-python' flag,"
|
|
" you must also set the '--use-env' flag."
|
|
)
|
|
run(args)
|
|
|
|
|
|
def main(args=None):
|
|
warnings.warn(
|
|
"The module torch.distributed.launch is deprecated\n"
|
|
"and will be removed in future. Use torchrun.\n"
|
|
"Note that --use-env is set by default in torchrun.\n"
|
|
"If your script expects `--local-rank` argument to be set, please\n"
|
|
"change it to read from `os.environ['LOCAL_RANK']` instead. See \n"
|
|
"https://pytorch.org/docs/stable/distributed.html#launch-utility for \n"
|
|
"further instructions\n",
|
|
FutureWarning,
|
|
)
|
|
args = parse_args(args)
|
|
launch(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|