pytorch/torch/_lazy/__init__.py
Shunting Zhang 19747cbbe6 Dynamo+LTC: merging related code from staging branch to master (#75046)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/75046

Merge the code needed for dynamic+ltc integration from the staging branch to the master branch.

Test Plan:
Unit test
```
pytest test_extract_compiled_graph
```
test thru dynamo
```
LTC_TS_CUDA=1 time python torchbench.py --speedup-ltc -dcuda --nvfuser --randomize-input --only <model name>
```

Reviewed By: alanwaketan

Differential Revision: D35300646

Pulled By: shunting314

fbshipit-source-id: 09ed20d3bb8ef80e4b93ba87ea3356a07d2dccdb
(cherry picked from commit 2b56771cdfd2cfa825c65ee9fd42338fb372fb32)
2022-04-02 00:23:15 +00:00

34 lines
1.3 KiB
Python

import torch._C._lazy
def mark_step(device: str = "lazy:0", wait=False):
"""Triggers a mark step, which amounts to
- collecting a group of 'live' lazy tensors to index into the compilation cache
(lowering/compiling their IR graphs if not cached)
- kicking off execution of the compiled function
- (optionally, wait=True) waiting for cpu-side execution to complete (does not sync the accelerator)
"""
# TODO(whc) expand this to include backend hooks and align with XLA backend needs
torch._C._lazy._mark_step(device, [], wait=wait)
def wait_device_ops(devices=None):
"""Waits for all the async operations on the given devices to complete.
Args:
devices (string..., optional): The devices whose async ops need to be waited
for. If empty, all the local devices will be waited for.
"""
if devices is None:
devices = []
torch._C._lazy._wait_device_ops(devices=devices)
def sync_multi(tensors, devices):
"""
Sync the list of lazy tensors so there IR get lowered for the activate backend
and the compiled computation graph get cached.
"""
torch._C._lazy._sync_multi(tensors, devices)
def get_tensor_id(tensor):
"""Return a unique id of the lazy tensor maintained by LTC"""
return torch._C._lazy._get_tensor_id(tensor)