mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Convert to markdown onnx rst (#155228)
Fixes #155030 Converts the following files to MyST markdown and ensure that the doc tests are green: - [x] [onnx_dynamo_onnxruntime_backend.rst](https://github.com/pytorch/pytorch/tree/main/docs/source/onnx_dynamo_onnxruntime_backend.rst) - [x] [onnx_dynamo.rst](https://github.com/pytorch/pytorch/tree/main/docs/source/onnx_dynamo.rst) - [x] [onnx_ops.rst](https://github.com/pytorch/pytorch/tree/main/docs/source/onnx_ops.rst) - [onnx_torchscript_supported_aten_ops.rst](https://github.com/pytorch/pytorch/tree/main/docs/source/onnx_torchscript_supported_aten_ops.rst) - not changed as it is autogenerated - [onnx_torchscript.rst](https://github.com/pytorch/pytorch/tree/main/docs/source/onnx_torchscript.rst) - fixed in #155390 Pull Request resolved: https://github.com/pytorch/pytorch/pull/155228 Approved by: https://github.com/svekars Co-authored-by: Svetlana Karslioglu <svekars@meta.com>
This commit is contained in:
parent
7a03b0d2ca
commit
ae0f1f8984
166
docs/source/onnx_dynamo.md
Normal file
166
docs/source/onnx_dynamo.md
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
# TorchDynamo-based ONNX Exporter
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: torch.onnx
|
||||
:noindex:
|
||||
```
|
||||
|
||||
```{contents}
|
||||
:local:
|
||||
:depth: 1
|
||||
```
|
||||
|
||||
## Overview
|
||||
|
||||
The ONNX exporter leverages TorchDynamo engine to hook into Python's frame evaluation API
|
||||
and dynamically rewrite its bytecode into an FX Graph.
|
||||
The resulting FX Graph is then polished before it is finally translated into an ONNX graph.
|
||||
|
||||
The main advantage of this approach is that the [FX graph](https://pytorch.org/docs/stable/fx.html) is captured using
|
||||
bytecode analysis that preserves the dynamic nature of the model instead of using traditional static tracing techniques.
|
||||
|
||||
In addition, during the export process, memory usage is significantly reduced compared to the TorchScript-enabled exporter.
|
||||
See the {doc}`memory usage documentation <onnx_dynamo_memory_usage>` for more information.
|
||||
|
||||
|
||||
## Dependencies
|
||||
|
||||
The ONNX exporter depends on extra Python packages:
|
||||
|
||||
- [ONNX](https://onnx.ai)
|
||||
- [ONNX Script](https://microsoft.github.io/onnxscript)
|
||||
|
||||
They can be installed through [pip](https://pypi.org/project/pip/):
|
||||
|
||||
```{code-block} bash
|
||||
|
||||
pip install --upgrade onnx onnxscript
|
||||
```
|
||||
|
||||
[onnxruntime](https://onnxruntime.ai) can then be used to execute the model
|
||||
on a large variety of processors.
|
||||
|
||||
## A simple example
|
||||
|
||||
See below a demonstration of exporter API in action with a simple Multilayer Perceptron (MLP) as example:
|
||||
|
||||
```{code-block} python
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class MLPModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.fc0 = nn.Linear(8, 8, bias=True)
|
||||
self.fc1 = nn.Linear(8, 4, bias=True)
|
||||
self.fc2 = nn.Linear(4, 2, bias=True)
|
||||
self.fc3 = nn.Linear(2, 2, bias=True)
|
||||
self.fc_combined = nn.Linear(8 + 8 + 8, 8, bias=True) # Combine all inputs
|
||||
|
||||
def forward(self, tensor_x: torch.Tensor, input_dict: dict, input_list: list):
|
||||
"""
|
||||
Forward method that requires all inputs:
|
||||
- tensor_x: A direct tensor input.
|
||||
- input_dict: A dictionary containing the tensor under the key 'tensor_x'.
|
||||
- input_list: A list where the first element is the tensor.
|
||||
"""
|
||||
# Extract tensors from inputs
|
||||
dict_tensor = input_dict['tensor_x']
|
||||
list_tensor = input_list[0]
|
||||
|
||||
# Combine all inputs into a single tensor
|
||||
combined_tensor = torch.cat([tensor_x, dict_tensor, list_tensor], dim=1)
|
||||
|
||||
# Process the combined tensor through the layers
|
||||
combined_tensor = self.fc_combined(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
combined_tensor = self.fc0(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
combined_tensor = self.fc1(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
combined_tensor = self.fc2(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
output = self.fc3(combined_tensor)
|
||||
return output
|
||||
|
||||
model = MLPModel()
|
||||
|
||||
# Example inputs
|
||||
tensor_input = torch.rand((97, 8), dtype=torch.float32)
|
||||
dict_input = {'tensor_x': torch.rand((97, 8), dtype=torch.float32)}
|
||||
list_input = [torch.rand((97, 8), dtype=torch.float32)]
|
||||
|
||||
# The input_names and output_names are used to identify the inputs and outputs of the ONNX model
|
||||
input_names = ['tensor_input', 'tensor_x', 'list_input_index_0']
|
||||
output_names = ['output']
|
||||
|
||||
# Exporting the model with all required inputs
|
||||
onnx_program = torch.onnx.export(model,(tensor_input, dict_input, list_input), dynamic_shapes=({0: "batch_size"},{"tensor_x": {0: "batch_size"}},[{0: "batch_size"}]), input_names=input_names, output_names=output_names, dynamo=True,)
|
||||
|
||||
# Check the exported ONNX model is dynamic
|
||||
assert onnx_program.model.graph.inputs[0].shape == ("batch_size", 8)
|
||||
assert onnx_program.model.graph.inputs[1].shape == ("batch_size", 8)
|
||||
assert onnx_program.model.graph.inputs[2].shape == ("batch_size", 8)
|
||||
```
|
||||
|
||||
As the code above shows, all you need is to provide {func}`torch.onnx.export` with an instance of the model and its input.
|
||||
The exporter will then return an instance of {class}`torch.onnx.ONNXProgram` that contains the exported ONNX graph along with extra information.
|
||||
|
||||
The in-memory model available through ``onnx_program.model_proto`` is an ``onnx.ModelProto`` object in compliance with the [ONNX IR spec](https://github.com/onnx/onnx/blob/main/docs/IR.md).
|
||||
The ONNX model may then be serialized into a [Protobuf file](https://protobuf.dev/) using the {meth}`torch.onnx.ONNXProgram.save` API.
|
||||
|
||||
```{code-block} python
|
||||
onnx_program.save("mlp.onnx")
|
||||
```
|
||||
|
||||
## Use the same model to compare with the TorchScript-enabled exporter
|
||||
|
||||
The biggest difference between the TorchScript-enabled exporter and the TorchDynamo-based exporter is that the latter
|
||||
requires dynamic_shapes to be the same tree structure as the input, while the former
|
||||
requires the dynamic_shapes to be a single and flatten dictionary.
|
||||
|
||||
```{code-block} python
|
||||
torch.onnx.export(model,(tensor_input, dict_input, list_input), "mlp.onnx", dynamic_axes={"tensor_input":{0: "batch_size"}, "tensor_x": {0: "batch_size"}, "list_input_index_0": {0: "batch_size"}}, input_names=input_names, output_names=output_names)
|
||||
```
|
||||
|
||||
## Inspecting the ONNX model using GUI
|
||||
|
||||
You can view the exported model using [Netron](https://netron.app/).
|
||||
|
||||
```{image} _static/img/onnx/onnx_dynamo_mlp_model.png
|
||||
:alt: MLP model as viewed using Netron
|
||||
:width: 30%
|
||||
:align: center
|
||||
```
|
||||
|
||||
## When the conversion fails
|
||||
|
||||
Function {func}`torch.onnx.export` should called a second time with
|
||||
parameter ``report=True``. A markdown report is generated to help the user
|
||||
to resolve the issue.
|
||||
|
||||
```{toctree}
|
||||
:hidden:
|
||||
onnx_dynamo_memory_usage
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
```{eval-rst}
|
||||
.. autofunction:: torch.onnx.export
|
||||
.. autoclass:: torch.onnx.ONNXProgram
|
||||
:members:
|
||||
.. autofunction:: is_in_onnx_export
|
||||
.. autoclass:: torch.onnx.OnnxExporterError
|
||||
:members:
|
||||
.. autofunction:: torch.onnx.enable_fake_mode
|
||||
```
|
||||
|
||||
## Deprecated
|
||||
|
||||
The following classes and functions are deprecated and will be removed.
|
||||
|
||||
```{eval-rst}
|
||||
.. autofunction:: torch.onnx.dynamo_export
|
||||
.. autoclass:: torch.onnx.ExportOptions
|
||||
```
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
TorchDynamo-based ONNX Exporter
|
||||
===============================
|
||||
|
||||
.. automodule:: torch.onnx
|
||||
:noindex:
|
||||
|
||||
.. contents:: :local:
|
||||
:depth: 1
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
The ONNX exporter leverages TorchDynamo engine to hook into Python's frame evaluation API
|
||||
and dynamically rewrite its bytecode into an FX Graph.
|
||||
The resulting FX Graph is then polished before it is finally translated into an ONNX graph.
|
||||
|
||||
The main advantage of this approach is that the `FX graph <https://pytorch.org/docs/stable/fx.html>`_ is captured using
|
||||
bytecode analysis that preserves the dynamic nature of the model instead of using traditional static tracing techniques.
|
||||
|
||||
In addition, during the export process, memory usage is significantly reduced compared to the TorchScript-enabled exporter.
|
||||
See the :doc:`memory usage documentation <onnx_dynamo_memory_usage>` for more information.
|
||||
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
The ONNX exporter depends on extra Python packages:
|
||||
|
||||
- `ONNX <https://onnx.ai>`_
|
||||
- `ONNX Script <https://microsoft.github.io/onnxscript>`_
|
||||
|
||||
They can be installed through `pip <https://pypi.org/project/pip/>`_:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install --upgrade onnx onnxscript
|
||||
|
||||
`onnxruntime <https://onnxruntime.ai>`_ can then be used to execute the model
|
||||
on a large variety of processors.
|
||||
|
||||
A simple example
|
||||
----------------
|
||||
|
||||
See below a demonstration of exporter API in action with a simple Multilayer Perceptron (MLP) as example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class MLPModel(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.fc0 = nn.Linear(8, 8, bias=True)
|
||||
self.fc1 = nn.Linear(8, 4, bias=True)
|
||||
self.fc2 = nn.Linear(4, 2, bias=True)
|
||||
self.fc3 = nn.Linear(2, 2, bias=True)
|
||||
self.fc_combined = nn.Linear(8 + 8 + 8, 8, bias=True) # Combine all inputs
|
||||
|
||||
def forward(self, tensor_x: torch.Tensor, input_dict: dict, input_list: list):
|
||||
"""
|
||||
Forward method that requires all inputs:
|
||||
- tensor_x: A direct tensor input.
|
||||
- input_dict: A dictionary containing the tensor under the key 'tensor_x'.
|
||||
- input_list: A list where the first element is the tensor.
|
||||
"""
|
||||
# Extract tensors from inputs
|
||||
dict_tensor = input_dict['tensor_x']
|
||||
list_tensor = input_list[0]
|
||||
|
||||
# Combine all inputs into a single tensor
|
||||
combined_tensor = torch.cat([tensor_x, dict_tensor, list_tensor], dim=1)
|
||||
|
||||
# Process the combined tensor through the layers
|
||||
combined_tensor = self.fc_combined(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
combined_tensor = self.fc0(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
combined_tensor = self.fc1(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
combined_tensor = self.fc2(combined_tensor)
|
||||
combined_tensor = torch.sigmoid(combined_tensor)
|
||||
output = self.fc3(combined_tensor)
|
||||
return output
|
||||
|
||||
model = MLPModel()
|
||||
|
||||
# Example inputs
|
||||
tensor_input = torch.rand((97, 8), dtype=torch.float32)
|
||||
dict_input = {'tensor_x': torch.rand((97, 8), dtype=torch.float32)}
|
||||
list_input = [torch.rand((97, 8), dtype=torch.float32)]
|
||||
|
||||
# The input_names and output_names are used to identify the inputs and outputs of the ONNX model
|
||||
input_names = ['tensor_input', 'tensor_x', 'list_input_index_0']
|
||||
output_names = ['output']
|
||||
|
||||
# Exporting the model with all required inputs
|
||||
onnx_program = torch.onnx.export(model,(tensor_input, dict_input, list_input), dynamic_shapes=({0: "batch_size"},{"tensor_x": {0: "batch_size"}},[{0: "batch_size"}]), input_names=input_names, output_names=output_names, dynamo=True,)
|
||||
|
||||
# Check the exported ONNX model is dynamic
|
||||
assert onnx_program.model.graph.inputs[0].shape == ("batch_size", 8)
|
||||
assert onnx_program.model.graph.inputs[1].shape == ("batch_size", 8)
|
||||
assert onnx_program.model.graph.inputs[2].shape == ("batch_size", 8)
|
||||
|
||||
As the code above shows, all you need is to provide :func:`torch.onnx.export` with an instance of the model and its input.
|
||||
The exporter will then return an instance of :class:`torch.onnx.ONNXProgram` that contains the exported ONNX graph along with extra information.
|
||||
|
||||
The in-memory model available through ``onnx_program.model_proto`` is an ``onnx.ModelProto`` object in compliance with the `ONNX IR spec <https://github.com/onnx/onnx/blob/main/docs/IR.md>`_.
|
||||
The ONNX model may then be serialized into a `Protobuf file <https://protobuf.dev/>`_ using the :meth:`torch.onnx.ONNXProgram.save` API.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
onnx_program.save("mlp.onnx")
|
||||
|
||||
Use the same model to compare with the TorchScript-enabled exporter
|
||||
-------------------------------------------------------------------
|
||||
|
||||
The biggest difference between the TorchScript-enabled exporter and the TorchDynamo-based exporter is that the latter
|
||||
requires dynamic_shapes to be the same tree structure as the input, while the former
|
||||
requires the dynamic_shapes to be a single and flatten dictionary.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
torch.onnx.export(model,(tensor_input, dict_input, list_input), "mlp.onnx", dynamic_axes={"tensor_input":{0: "batch_size"}, "tensor_x": {0: "batch_size"}, "list_input_index_0": {0: "batch_size"}}, input_names=input_names, output_names=output_names)
|
||||
|
||||
Inspecting the ONNX model using GUI
|
||||
-----------------------------------
|
||||
|
||||
You can view the exported model using `Netron <https://netron.app/>`__.
|
||||
|
||||
.. image:: _static/img/onnx/onnx_dynamo_mlp_model.png
|
||||
:width: 40%
|
||||
:alt: MLP model as viewed using Netron
|
||||
|
||||
When the conversion fails
|
||||
-------------------------
|
||||
|
||||
Function :func:`torch.onnx.export` should called a second time with
|
||||
parameter ``report=True``. A markdown report is generated to help the user
|
||||
to resolve the issue.
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
|
||||
onnx_dynamo_memory_usage
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
.. autofunction:: torch.onnx.export
|
||||
.. autoclass:: torch.onnx.ONNXProgram
|
||||
:members:
|
||||
.. autofunction:: is_in_onnx_export
|
||||
.. autoclass:: torch.onnx.OnnxExporterError
|
||||
:members:
|
||||
.. autofunction:: torch.onnx.enable_fake_mode
|
||||
|
||||
Deprecated
|
||||
----------
|
||||
|
||||
The following classes and functions are deprecated and will be removed.
|
||||
|
||||
.. autofunction:: torch.onnx.dynamo_export
|
||||
.. autoclass:: torch.onnx.ExportOptions
|
||||
11
docs/source/onnx_dynamo_onnxruntime_backend.md
Normal file
11
docs/source/onnx_dynamo_onnxruntime_backend.md
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# ONNX Backend for TorchDynamo
|
||||
|
||||
For a quick overview of `torch.compiler`, see {ref}`torch.compiler_overview`.
|
||||
|
||||
```{warning}
|
||||
The ONNX backend for torch.compile is a rapidly evolving beta technology.
|
||||
```
|
||||
|
||||
```{eval-rst}
|
||||
.. autofunction:: torch.onnx.is_onnxrt_backend_supported
|
||||
```
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
ONNX Backend for TorchDynamo
|
||||
============================
|
||||
|
||||
For a quick overview of ``torch.compiler``, see :ref:`torch.compiler_overview`.
|
||||
|
||||
.. warning::
|
||||
The ONNX backend for torch.compile is a rapidly evolving beta technology.
|
||||
|
||||
.. autofunction:: torch.onnx.is_onnxrt_backend_supported
|
||||
127
docs/source/onnx_ops.md
Normal file
127
docs/source/onnx_ops.md
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# torch.onnx.ops
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: torch.onnx.ops
|
||||
```
|
||||
|
||||
## Symbolic Operators
|
||||
|
||||
Operators that can be used to create any ONNX ops in the FX graph symbolically.
|
||||
These operators do not do actual computation. It's recommended that you used them
|
||||
inside an ``if torch.onnx.is_in_onnx_export`` block.
|
||||
|
||||
```{eval-rst}
|
||||
.. autofunction:: torch.onnx.ops.symbolic
|
||||
.. autofunction:: torch.onnx.ops.symbolic_multi_out
|
||||
```
|
||||
|
||||
|
||||
## ONNX Operators
|
||||
|
||||
The following operators are implemented as native PyTorch ops and can be exported as
|
||||
ONNX operators. They can be used natively in an ``nn.Module``.
|
||||
|
||||
For example, you can define a module:
|
||||
|
||||
```{code-block} python
|
||||
class Model(torch.nn.Module):
|
||||
def forward(
|
||||
self, input_data, cos_cache_data, sin_cache_data, position_ids_data
|
||||
):
|
||||
return torch.onnx.ops.rotary_embedding(
|
||||
input_data,
|
||||
cos_cache_data,
|
||||
sin_cache_data,
|
||||
position_ids_data,
|
||||
)
|
||||
```
|
||||
|
||||
and export it to ONNX using:
|
||||
|
||||
```{code-block} python
|
||||
input_data = torch.rand(2, 3, 4, 8)
|
||||
position_ids_data = torch.randint(0, 50, (2, 3)).long()
|
||||
sin_cache_data = torch.rand(50, 4)
|
||||
cos_cache_data = torch.rand(50, 4)
|
||||
dynamic_shapes = {
|
||||
"input_data": {0: torch.export.Dim.DYNAMIC},
|
||||
"cos_cache_data": None,
|
||||
"sin_cache_data": None,
|
||||
"position_ids_data": {0: torch.export.Dim.DYNAMIC},
|
||||
}
|
||||
onnx_program = torch.onnx.export(
|
||||
model,
|
||||
(input_data, cos_cache_data, sin_cache_data, position_ids_data),
|
||||
dynamic_shapes=dynamic_shapes,
|
||||
dynamo=True,
|
||||
opset_version=23,
|
||||
)
|
||||
```
|
||||
|
||||
Printing the ONNX program will show the ONNX operators used in the graph:
|
||||
|
||||
```
|
||||
<...>
|
||||
|
||||
graph(
|
||||
name=main_graph,
|
||||
inputs=(
|
||||
%"input_data"<FLOAT,[s0,3,4,8]>,
|
||||
%"cos_cache_data"<FLOAT,[50,4]>,
|
||||
%"sin_cache_data"<FLOAT,[50,4]>,
|
||||
%"position_ids_data"<INT64,[s0,3]>
|
||||
),
|
||||
outputs=(
|
||||
%"rotary_embedding"<FLOAT,[s0,3,4,8]>
|
||||
),
|
||||
) {
|
||||
0 | # rotary_embedding
|
||||
%"rotary_embedding"<FLOAT,[s0,3,4,8]> ⬅️ ::RotaryEmbedding(%"input_data", %"cos_cache_data", %"sin_cache_data", %"position_ids_data")
|
||||
return %"rotary_embedding"<FLOAT,[s0,3,4,8]>
|
||||
}
|
||||
```
|
||||
|
||||
with the corresponding ``ExportedProgram``:
|
||||
|
||||
ExportedProgram:
|
||||
```{code-block} python
|
||||
class GraphModule(torch.nn.Module):
|
||||
def forward(self, input_data: "f32[s0, 3, 4, 8]", cos_cache_data: "f32[50, 4]", sin_cache_data: "f32[50, 4]", position_ids_data: "i64[s0, 3]"):
|
||||
rotary_embedding: "f32[s0, 3, 4, 8]" = torch.ops.onnx.RotaryEmbedding.opset23(input_data, cos_cache_data, sin_cache_data, position_ids_data); input_data = cos_cache_data = sin_cache_data = position_ids_data = None
|
||||
return (rotary_embedding,)
|
||||
```
|
||||
|
||||
```{eval-rst}
|
||||
.. autofunction:: torch.onnx.ops.rotary_embedding
|
||||
```
|
||||
|
||||
## ONNX to ATen Decomposition Table
|
||||
|
||||
You can use {func}`torch.onnx.ops.aten_decompositions` to obtain a decomposition table
|
||||
to decompose ONNX operators defined above to ATen operators.
|
||||
|
||||
```{code-block} python
|
||||
class Model(torch.nn.Module):
|
||||
def forward(
|
||||
self, input_data, cos_cache_data, sin_cache_data, position_ids_data
|
||||
):
|
||||
return torch.onnx.ops.rotary_embedding(
|
||||
input_data,
|
||||
cos_cache_data,
|
||||
sin_cache_data,
|
||||
position_ids_data,
|
||||
)
|
||||
|
||||
model = Model()
|
||||
|
||||
ep = torch.export.export(
|
||||
model,
|
||||
(input_data, cos_cache_data, sin_cache_data, position_ids_data),
|
||||
)
|
||||
# The program can be decomposed into aten ops
|
||||
ep_decomposed = ep.run_decompositions(torch.onnx.ops.aten_decompositions())
|
||||
```
|
||||
|
||||
```{eval-rst}
|
||||
.. autofunction:: torch.onnx.ops.aten_decompositions
|
||||
```
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
torch.onnx.ops
|
||||
==============
|
||||
|
||||
.. automodule:: torch.onnx.ops
|
||||
|
||||
Symbolic Operators
|
||||
------------------
|
||||
|
||||
Operators that can be used to create any ONNX ops in the FX graph symbolically.
|
||||
These operators do not do actual computation. It's recommended that you used them
|
||||
inside an ``if torch.onnx.is_in_onnx_export`` block.
|
||||
|
||||
.. autofunction:: torch.onnx.ops.symbolic
|
||||
.. autofunction:: torch.onnx.ops.symbolic_multi_out
|
||||
|
||||
|
||||
ONNX Operators
|
||||
--------------
|
||||
|
||||
The following operators are implemented as native PyTorch ops and can be exported as
|
||||
ONNX operators. They can be used natively in an ``nn.Module``.
|
||||
|
||||
For example, you can define a module::
|
||||
|
||||
class Model(torch.nn.Module):
|
||||
def forward(
|
||||
self, input_data, cos_cache_data, sin_cache_data, position_ids_data
|
||||
):
|
||||
return torch.onnx.ops.rotary_embedding(
|
||||
input_data,
|
||||
cos_cache_data,
|
||||
sin_cache_data,
|
||||
position_ids_data,
|
||||
)
|
||||
|
||||
and export it to ONNX using::
|
||||
|
||||
input_data = torch.rand(2, 3, 4, 8)
|
||||
position_ids_data = torch.randint(0, 50, (2, 3)).long()
|
||||
sin_cache_data = torch.rand(50, 4)
|
||||
cos_cache_data = torch.rand(50, 4)
|
||||
dynamic_shapes = {
|
||||
"input_data": {0: torch.export.Dim.DYNAMIC},
|
||||
"cos_cache_data": None,
|
||||
"sin_cache_data": None,
|
||||
"position_ids_data": {0: torch.export.Dim.DYNAMIC},
|
||||
}
|
||||
onnx_program = torch.onnx.export(
|
||||
model,
|
||||
(input_data, cos_cache_data, sin_cache_data, position_ids_data),
|
||||
dynamic_shapes=dynamic_shapes,
|
||||
dynamo=True,
|
||||
opset_version=23,
|
||||
)
|
||||
|
||||
Printing the ONNX program will show the ONNX operators used in the graph::
|
||||
|
||||
<...>
|
||||
graph(
|
||||
name=main_graph,
|
||||
inputs=(
|
||||
%"input_data"<FLOAT,[s0,3,4,8]>,
|
||||
%"cos_cache_data"<FLOAT,[50,4]>,
|
||||
%"sin_cache_data"<FLOAT,[50,4]>,
|
||||
%"position_ids_data"<INT64,[s0,3]>
|
||||
),
|
||||
outputs=(
|
||||
%"rotary_embedding"<FLOAT,[s0,3,4,8]>
|
||||
),
|
||||
) {
|
||||
0 | # rotary_embedding
|
||||
%"rotary_embedding"<FLOAT,[s0,3,4,8]> ⬅️ ::RotaryEmbedding(%"input_data", %"cos_cache_data", %"sin_cache_data", %"position_ids_data")
|
||||
return %"rotary_embedding"<FLOAT,[s0,3,4,8]>
|
||||
}
|
||||
|
||||
with the corresponding ``ExportedProgram``::
|
||||
|
||||
ExportedProgram:
|
||||
class GraphModule(torch.nn.Module):
|
||||
def forward(self, input_data: "f32[s0, 3, 4, 8]", cos_cache_data: "f32[50, 4]", sin_cache_data: "f32[50, 4]", position_ids_data: "i64[s0, 3]"):
|
||||
rotary_embedding: "f32[s0, 3, 4, 8]" = torch.ops.onnx.RotaryEmbedding.opset23(input_data, cos_cache_data, sin_cache_data, position_ids_data); input_data = cos_cache_data = sin_cache_data = position_ids_data = None
|
||||
return (rotary_embedding,)
|
||||
|
||||
|
||||
.. autofunction:: torch.onnx.ops.rotary_embedding
|
||||
|
||||
ONNX to ATen Decomposition Table
|
||||
--------------------------------
|
||||
|
||||
You can use :func:`torch.onnx.ops.aten_decompositions` to obtain a decomposition table
|
||||
to decompose ONNX operators defined above to ATen operators.
|
||||
|
||||
::
|
||||
|
||||
class Model(torch.nn.Module):
|
||||
def forward(
|
||||
self, input_data, cos_cache_data, sin_cache_data, position_ids_data
|
||||
):
|
||||
return torch.onnx.ops.rotary_embedding(
|
||||
input_data,
|
||||
cos_cache_data,
|
||||
sin_cache_data,
|
||||
position_ids_data,
|
||||
)
|
||||
|
||||
model = Model()
|
||||
|
||||
ep = torch.export.export(
|
||||
model,
|
||||
(input_data, cos_cache_data, sin_cache_data, position_ids_data),
|
||||
)
|
||||
# The program can be decomposed into aten ops
|
||||
ep_decomposed = ep.run_decompositions(torch.onnx.ops.aten_decompositions())
|
||||
|
||||
.. autofunction:: torch.onnx.ops.aten_decompositions
|
||||
|
|
@ -712,4 +712,4 @@ Classes
|
|||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
JitScalarType
|
||||
JitScalarType
|
||||
Loading…
Reference in New Issue
Block a user