Revert "Enable FlashAttention on Windows (#131906)"

This reverts commit b90bc66766.

Reverted https://github.com/pytorch/pytorch/pull/131906 on behalf of https://github.com/atalman due to Windows nightly failures ([comment](https://github.com/pytorch/pytorch/pull/131906#issuecomment-2256421183))
This commit is contained in:
PyTorch MergeBot 2024-07-29 16:49:23 +00:00
parent 3d4de8e96d
commit 6cf493158e
2 changed files with 2 additions and 2 deletions

View File

@ -855,7 +855,7 @@ cmake_dependent_option(
"Whether to build the flash_attention kernel for scaled dot product attention.\
Will be disabled if not supported by the platform"
ON
"USE_CUDA OR USE_ROCM; NOT MSVC OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0"
"USE_CUDA OR USE_ROCM;NOT MSVC"
OFF)
# We are currenlty not using alibi attention for Flash So we disable this

View File

@ -53,7 +53,7 @@ def evaluate_platform_supports_flash_attention():
if TEST_WITH_ROCM:
return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
if TEST_CUDA:
return (not IS_WINDOWS or int(torch.version.cuda.split('.')[0]) >= 12) and SM80OrLater
return not IS_WINDOWS and SM80OrLater
return False
def evaluate_platform_supports_efficient_attention():