mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[ROCm] Enable inductor GEMM lowering for gfx11 (#141687)
This check doesn't make sense for some of the AMD gpus since they have the right amount of CUs but multi_processor_count returns WGPs on RDNA while still performing adequately. A lot of tests fail on modern archs due to this check defaulting them to not using the GEMMs backend. Pull Request resolved: https://github.com/pytorch/pytorch/pull/141687 Approved by: https://github.com/pruthvistony, https://github.com/jeffdaily, https://github.com/malfet Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
parent
1f3d8896bc
commit
5c2584a14c
|
|
@ -1114,8 +1114,18 @@ class DelayReplaceLine(DeferredLineBase):
|
|||
|
||||
@functools.lru_cache(None)
|
||||
def is_big_gpu(index) -> bool:
|
||||
prop = torch.cuda.get_device_properties(index)
|
||||
|
||||
# SM logic is not relevant to ROCm gpus
|
||||
# Arbitrarily skipping the older models
|
||||
if torch.version.hip:
|
||||
if prop.major < 9 or prop.major == 10:
|
||||
log.warning("GPU arch does not support max_autotune_gemm mode usage")
|
||||
return False
|
||||
return True
|
||||
|
||||
min_sms = 68 # 3080
|
||||
avail_sms = torch.cuda.get_device_properties(index).multi_processor_count
|
||||
avail_sms = prop.multi_processor_count
|
||||
if avail_sms < min_sms:
|
||||
log.warning(
|
||||
"Not enough SMs to use max_autotune_gemm mode",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user