mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Currently there is `test_vertical_fusion1` which fuses entirely during the lowering stage and no buffers are realized. This adds `test_scheduler_vertical_fusion1` which is the same test but with several intermediate calculations realized so the scheduler is left to do the fusion. To support the test, this PR also adds: - `metrics.ir_nodes_pre_fusion` which when compared with `generated_kernel_count` tells us how many nodes were fused. - `torch._test_inductor_realize` which is an identity operator in eager, but under inductor also forces the input to be realized. Pull Request resolved: https://github.com/pytorch/pytorch/pull/90014 Approved by: https://github.com/jansel
23 lines
564 B
Python
23 lines
564 B
Python
# counter for tracking how many kernels have been generated
|
|
generated_kernel_count = 0
|
|
generated_cpp_vec_kernel_count = 0
|
|
num_bytes_accessed = 0
|
|
nodes_num_elem = []
|
|
|
|
# counters for tracking fusions
|
|
ir_nodes_pre_fusion = 0
|
|
|
|
|
|
# reset all counters
|
|
def reset():
|
|
global generated_kernel_count
|
|
global generated_cpp_vec_kernel_count
|
|
global num_bytes_accessed, nodes_num_elem
|
|
global ir_nodes_pre_fusion
|
|
|
|
generated_kernel_count = 0
|
|
generated_cpp_vec_kernel_count = 0
|
|
num_bytes_accessed = 0
|
|
nodes_num_elem.clear()
|
|
ir_nodes_pre_fusion = 0
|