mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[cutlass backend] Add dynamo timed (#157410)
Differential Revision: [D77631592](https://our.internmc.facebook.com/intern/diff/D77631592/) Before:  After (different run):  Pull Request resolved: https://github.com/pytorch/pytorch/pull/157410 Approved by: https://github.com/jingsh
This commit is contained in:
parent
493f42a541
commit
b642a5c118
|
|
@ -13,6 +13,7 @@ from typing import Any, Optional
|
|||
import sympy
|
||||
|
||||
import torch
|
||||
from torch._inductor.runtime.runtime_utils import dynamo_timed
|
||||
from torch._inductor.utils import clear_on_fresh_cache
|
||||
|
||||
from ... import config
|
||||
|
|
@ -278,9 +279,10 @@ def gen_ops() -> dict[Any, Any]:
|
|||
"""
|
||||
Generates all supported CUTLASS operations.
|
||||
"""
|
||||
arch = get_cuda_arch()
|
||||
version = get_cuda_version()
|
||||
return _gen_ops_cached(arch, version)
|
||||
with dynamo_timed("cutlass_utils.gen_ops"):
|
||||
arch = get_cuda_arch()
|
||||
version = get_cuda_version()
|
||||
return _gen_ops_cached(arch, version)
|
||||
|
||||
|
||||
DTYPE_TO_CUTLASS_TYPE = {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from typing import Any, Optional, Union
|
|||
import torch
|
||||
import torch.utils._pytree as pytree
|
||||
from torch._inductor.codegen.cuda.cutlass_cache import maybe_fetch_ops
|
||||
from torch._inductor.runtime.runtime_utils import dynamo_timed
|
||||
from torch._inductor.scheduler import BaseSchedulerNode
|
||||
from torch._inductor.select_algorithm import create_inputs_key
|
||||
from torch._inductor.utils import clear_on_fresh_cache
|
||||
|
|
@ -556,12 +557,15 @@ class CUTLASSGemmTemplate(CUTLASSTemplate, ABC):
|
|||
"""
|
||||
|
||||
ops = self.gen_ops()
|
||||
for name, op in ops:
|
||||
for swizzle in inductor_cuda_config.cutlass_max_profiling_swizzle_options:
|
||||
description = f"{name} swizzle={swizzle}"
|
||||
self.maybe_append_choice(
|
||||
choices, description=description, op=op, swizzle=swizzle
|
||||
)
|
||||
with dynamo_timed("CUTLASSGemmTemplate.maybe_append_choice"):
|
||||
for name, op in ops:
|
||||
for (
|
||||
swizzle
|
||||
) in inductor_cuda_config.cutlass_max_profiling_swizzle_options:
|
||||
description = f"{name} swizzle={swizzle}"
|
||||
self.maybe_append_choice(
|
||||
choices, description=description, op=op, swizzle=swizzle
|
||||
)
|
||||
|
||||
if len(ops) == 0:
|
||||
input_layouts = [node.get_layout() for node in input_nodes]
|
||||
|
|
@ -940,7 +944,8 @@ class CUTLASSGemmTemplate(CUTLASSTemplate, ABC):
|
|||
log.debug("Using cached ops for %s", self.cache_key)
|
||||
return self.filtered_ops_cache[self.cache_key]
|
||||
|
||||
maybe_ops = maybe_fetch_ops()
|
||||
with dynamo_timed("CUTLASSGemmTemplate.maybe_fetch_ops"):
|
||||
maybe_ops = maybe_fetch_ops()
|
||||
if maybe_ops is None:
|
||||
log.debug("Cannot fetch ops from cache, generating ops from scratch")
|
||||
full_ops = cutlass_utils.gen_ops()
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user