mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 00:20:18 +01:00
[Inductor] Limit fusions to a node distance of 64 (#154688)
fix for https://github.com/pytorch/pytorch/issues/154652 and https://fb.workplace.com/groups/1075192433118967/permalink/1484799079148049/ [window 128 dashboard run here w/ no regressions](https://hud.pytorch.org/benchmark/compilers?dashboard=torchinductor&startTime=Sun%2C%2001%20Jun%202025%2006%3A38%3A41%20GMT&stopTime=Sun%2C%2008%20Jun%202025%2006%3A38%3A41%20GMT&granularity=hour&mode=inference&dtype=bfloat16&deviceName=cuda%20(a100)&lBranch=mlazos/fuse-window&lCommit=8576f00ebfa53567d7bddc89d9882df9eb990561&rBranch=main&rCommit=9d59b516e9b3026948918e3ff8c2ef55a33d13ad) Pull Request resolved: https://github.com/pytorch/pytorch/pull/154688 Approved by: https://github.com/eellison, https://github.com/Raymo111
This commit is contained in:
parent
8b8684466a
commit
5dfe1787b5
|
|
@ -582,6 +582,9 @@ max_epilogue_benchmarked_choices = 1
|
|||
# how many nodes to allow into a single fusion
|
||||
max_fusion_size = 64
|
||||
|
||||
# how many nodes to attempt pairwise fusion with in a buffer group
|
||||
max_fusion_buffer_group_pairwise_attempts = 64
|
||||
|
||||
# max number of inputs to generate cat as a pointwise op with masked laods
|
||||
max_pointwise_cat_inputs = 8
|
||||
|
||||
|
|
|
|||
|
|
@ -3229,7 +3229,11 @@ class Scheduler:
|
|||
|
||||
def check_all_pairs(nodes: list[BaseSchedulerNode]) -> None:
|
||||
for node1_index, node1 in enumerate(nodes):
|
||||
for node2 in nodes[node1_index + 1 :]:
|
||||
for node2 in nodes[
|
||||
node1_index + 1 : node1_index
|
||||
+ 1
|
||||
+ config.max_fusion_buffer_group_pairwise_attempts
|
||||
]:
|
||||
key = (node1, node2)
|
||||
if key in seen:
|
||||
continue
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user