Revert "Enable FlashAttention on Windows (#131906)"

This reverts commit b90bc66766. Reverted https://github.com/pytorch/pytorch/pull/131906 on behalf of https://github.com/atalman due to Windows nightly failures ([comment](https://github.com/pytorch/pytorch/pull/131906#issuecomment-2256421183))
2025-12-06 12:20:52 +01:00 · 2024-07-29 16:49:23 +00:00 · 2024-07-29 16:49:23 +00:00 · 6cf493158e
commit 6cf493158e
parent 3d4de8e96d
2 changed files with 2 additions and 2 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -855,7 +855,7 @@ cmake_dependent_option(
  "Whether to build the flash_attention kernel for scaled dot product attention.\
  Will be disabled if not supported by the platform"
  ON
-  "USE_CUDA OR USE_ROCM; NOT MSVC OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0"
+  "USE_CUDA OR USE_ROCM;NOT MSVC"
  OFF)

 # We are currenlty not using alibi attention for Flash So we disable this
--- a/torch/testing/_internal/common_cuda.py
+++ b/torch/testing/_internal/common_cuda.py
@ -53,7 +53,7 @@ def evaluate_platform_supports_flash_attention():
    if TEST_WITH_ROCM:
        return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
    if TEST_CUDA:
-        return (not IS_WINDOWS or int(torch.version.cuda.split('.')[0]) >= 12) and SM80OrLater
+        return not IS_WINDOWS and SM80OrLater
    return False

 def evaluate_platform_supports_efficient_attention():