mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary: AMD lowering duration is 1.55x longer than H100. Profiling shows hipification related functions took 22% of overall lowering time. This diff cuts that time by safely memoize the trie to regex logic. The trick is to incrementally build a state of the trie during the trie construction. The state is the hash of all the words added to the trie. Differential Revision: D65659445 Pull Request resolved: https://github.com/pytorch/pytorch/pull/140156 Approved by: https://github.com/ColinPeppler Co-authored-by: Kefei Lu <kefeilu@meta.com> |
||
|---|---|---|
| .. | ||
| _strobelight | ||
| _sympy | ||
| backcompat | ||
| benchmark | ||
| bottleneck | ||
| data | ||
| hipify | ||
| jit | ||
| model_dump | ||
| tensorboard | ||
| viz | ||
| __init__.py | ||
| _backport_slots.py | ||
| _config_module.py | ||
| _config_typing.pyi | ||
| _content_store.py | ||
| _contextlib.py | ||
| _cpp_extension_versioner.py | ||
| _cxx_pytree.py | ||
| _device.py | ||
| _exposed_in.py | ||
| _foreach_utils.py | ||
| _freeze.py | ||
| _get_clean_triton.py | ||
| _import_utils.py | ||
| _mode_utils.py | ||
| _ordered_set.py | ||
| _python_dispatch.py | ||
| _pytree.py | ||
| _stats.py | ||
| _thunk.py | ||
| _traceback.py | ||
| _triton.py | ||
| _typing_utils.py | ||
| _zip.py | ||
| backend_registration.py | ||
| bundled_inputs.py | ||
| checkpoint.py | ||
| collect_env.py | ||
| cpp_backtrace.py | ||
| cpp_extension.py | ||
| deterministic.py | ||
| dlpack.py | ||
| file_baton.py | ||
| flop_counter.py | ||
| hooks.py | ||
| mkldnn.py | ||
| mobile_optimizer.py | ||
| model_zoo.py | ||
| module_tracker.py | ||
| show_pickle.py | ||
| throughput_benchmark.py | ||
| weak.py | ||