mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "Enable FlashAttention on Windows (#131906)"
This reverts commit b90bc66766.
Reverted https://github.com/pytorch/pytorch/pull/131906 on behalf of https://github.com/atalman due to Windows nightly failures ([comment](https://github.com/pytorch/pytorch/pull/131906#issuecomment-2256421183))
This commit is contained in:
parent
3d4de8e96d
commit
6cf493158e
|
|
@ -855,7 +855,7 @@ cmake_dependent_option(
|
|||
"Whether to build the flash_attention kernel for scaled dot product attention.\
|
||||
Will be disabled if not supported by the platform"
|
||||
ON
|
||||
"USE_CUDA OR USE_ROCM; NOT MSVC OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0"
|
||||
"USE_CUDA OR USE_ROCM;NOT MSVC"
|
||||
OFF)
|
||||
|
||||
# We are currenlty not using alibi attention for Flash So we disable this
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ def evaluate_platform_supports_flash_attention():
|
|||
if TEST_WITH_ROCM:
|
||||
return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
|
||||
if TEST_CUDA:
|
||||
return (not IS_WINDOWS or int(torch.version.cuda.split('.')[0]) >= 12) and SM80OrLater
|
||||
return not IS_WINDOWS and SM80OrLater
|
||||
return False
|
||||
|
||||
def evaluate_platform_supports_efficient_attention():
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user