From 5863476a0520b5dc43e903314e0e41a6aae63b01 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 16 Oct 2025 14:29:02 -0700
Subject: [PATCH] Integrate LLVM at llvm/llvm-project@bfee9db78577

Updates LLVM usage to match
[bfee9db78577](https://github.com/llvm/llvm-project/commit/bfee9db78577)

PiperOrigin-RevId: 820396282
---
 .../xla/third_party/llvm/workspace.bzl        |   4 +-
 .../xla/third_party/shardy/temporary.patch    | 300 ++++--------------
 .../xla/third_party/shardy/workspace.bzl      |   4 +-
 3 files changed, 57 insertions(+), 251 deletions(-)
diff --git a/third_party/xla/third_party/llvm/workspace.bzl b/third_party/xla/third_party/llvm/workspace.bzl
index ed562d40d85..ba4c1a25844 100644
--- a/third_party/xla/third_party/llvm/workspace.bzl
+++ b/third_party/xla/third_party/llvm/workspace.bzl
@@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")
 
 def repo(name):
     """Imports LLVM."""
-    LLVM_COMMIT = "267fa8dd1efce0b79ebcaa804d54542c99918df2"
-    LLVM_SHA256 = "a72180219b02c46a11fa11d7ca3e5c4f57ecaa348162e010e73a59bd26623950"
+    LLVM_COMMIT = "bfee9db7857757e63b64fb4d411a264690ff711a"
+    LLVM_SHA256 = "b14cb659a35562d1fccee470d0bba41cf96363e1b576e113a3a795db9ad78e3e"
 
     tf_http_archive(
         name = name,
diff --git a/third_party/xla/third_party/shardy/temporary.patch b/third_party/xla/third_party/shardy/temporary.patch
index b544a01d555..edd883f9c23 100644
--- a/third_party/xla/third_party/shardy/temporary.patch
+++ b/third_party/xla/third_party/shardy/temporary.patch
@@ -1,265 +1,71 @@
-diff --git a/shardy/dialect/sdy/ir/utils.cc b/shardy/dialect/sdy/ir/utils.cc
-index ef43fa2..54bdd21 100644
---- a/shardy/dialect/sdy/ir/utils.cc
-+++ b/shardy/dialect/sdy/ir/utils.cc
-@@ -660,17 +660,5 @@ void truncateAxesByRemovingOverlaps(SmallVector<AxisRefAttr>& axes,
-   }
- }
+diff --git a/docs/mpmd/mpmd_sharding_propagation_passes.md b/docs/mpmd/mpmd_sharding_propagation_passes.md
+index a3b51cc..3b40773 100644
+--- a/docs/mpmd/mpmd_sharding_propagation_passes.md
++++ b/docs/mpmd/mpmd_sharding_propagation_passes.md
+@@ -57,10 +57,3 @@ This pass is only applied to MPMD functions in global view and with a
+ homogeneous topology.
  
--bool overlaps(ArrayRef<AxisRefAttr> axisRefs,
--              ArrayRef<AxisRefAttr> otherAxisRefs) {
--  for (AxisRefAttr axisRef : axisRefs) {
--    for (AxisRefAttr otherAxisRef : otherAxisRefs) {
--      if (axisRef.overlaps(otherAxisRef)) {
--        return true;
--      }
--    }
--  }
--  return false;
--}
+ Precondition: all shardings are specified as op attributes and not in types.
 -
- }  // namespace sdy
- }  // namespace mlir
-diff --git a/shardy/dialect/sdy/ir/utils.h b/shardy/dialect/sdy/ir/utils.h
-index e5e9f73..b59ffb7 100644
---- a/shardy/dialect/sdy/ir/utils.h
-+++ b/shardy/dialect/sdy/ir/utils.h
-@@ -572,10 +572,6 @@ std::optional<AxisRefAttr> getPrefixWithoutOverlap(
- void truncateAxesByRemovingOverlaps(SmallVector<AxisRefAttr>& axes,
-                                     ArrayRef<AxisRefAttr> otherAxisRefs);
- 
--// Returns whether `axisRefs` overlaps with `otherAxisRefs`.
--bool overlaps(ArrayRef<AxisRefAttr> axisRefs,
--              ArrayRef<AxisRefAttr> otherAxisRefs);
+-### `-mpmd-simplify-program`
 -
- }  // namespace sdy
- }  // namespace mlir
- 
+-_Removes redundant arg/results from fragments._
+-
+-Simplifies a fragment or loop, its operands and results, and their
+-corresponding block arguments and return values.
 diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
-index cc2c8f5..f84811c 100644
+index 862a2da..be8f02c 100644
 --- a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
 +++ b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
-@@ -103,20 +103,6 @@ AxesPerFactor getCompatibleFactorShardings(
+@@ -433,9 +433,11 @@ class FactorAxesCandidateBag {
+         for (int64_t index = 1; index < factorIndices.size(); index++) {
+           int64_t factorIndex = factorIndices[index];
+           int64_t dependsOn = factorIndices[index - 1];
+-          factorDependenciesMap
+-              .try_emplace(factorIndex, shardingRule.getNumFactors())
+-              .first->second.set(dependsOn);
++          if (!factorDependenciesMap.contains(factorIndex)) {
++            factorDependenciesMap.try_emplace(factorIndex,
++                                              shardingRule.getNumFactors());
++          }
++          factorDependenciesMap[factorIndex].set(dependsOn);
+         }
+       }
      }
+diff --git a/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc b/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
+index dcec262..b1d7585 100644
+--- a/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
++++ b/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
+@@ -77,7 +77,7 @@ void insertExplicitReshardsToTargetSharding(OpOperand& opOperand,
+         rewriter, operand.getLoc(), operand,
+         targetSharding
+             ? targetSharding
+-            // Since operand and target shardings are not equivalent and
++            // Since opearand and target shardings are not equivalent and
+             // `targetSharding` is empty, `operandSharding` is guaranteed to be
+             // nonempty.
+             : TensorShardingAttr::getFullyClosedLike(operandSharding));
+@@ -327,7 +327,7 @@ void insertAllReduceOnOpIfUnreducedToReplicated(
+       return sharding && !sharding.getUnreducedAxes().empty();
+     };
+     SDY_CHECK(!llvm::any_of(op->getOpOperands(), operandHasUnreducedAxes))
+-        << "Some operands have unreduced axes but the operation has no "
++        << "Some operands has unreduced axes but the operation has no "
+           "results. ";
+     return;
    }
- 
--  // Detect conflict between reduction factors and output shardings.
--  // TODO(enver): Improve the compile-time performance.
--  for (const int64_t factorIndex : shardingRule.getReductionFactors()) {
--    ArrayRef<AxisRefAttr> reductionSharding = commonAxesPerFactor[factorIndex];
--    for (const TensorFactorShardings& outTensorFactorSharding :
--         shardingProjection.getResults()) {
--      for (const auto& [outFactorIndex, outFactorSharding] :
--           outTensorFactorSharding.factorIndexToSharding) {
--        if (overlaps(reductionSharding, outFactorSharding.axisRefs)) {
--          return {};
--        }
--      }
--    }
--  }
-   return commonAxesPerFactor;
- }
- 
-diff --git a/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards/gather_scatter.mlir b/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards/gather_scatter.mlir
-index 3df8060..b55cf25 100644
---- a/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards/gather_scatter.mlir
-+++ b/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards/gather_scatter.mlir
-@@ -190,17 +190,3 @@ func.func @scatter_no_reduction(
-   } : (tensor<6x4x10x12x14xf32>, tensor<12x4x2xi64>, tensor<12x2x4x10xf32>) -> tensor<6x4x10x12x14xf32>
-   return %0 : tensor<6x4x10x12x14xf32>
- }
--
--sdy.mesh @mesh = <["x"=2, "y"=2]>
--// CHECK-LABEL: @gather_reduction_factor_sharding_overlaps_with_output_sharding
--func.func @gather_reduction_factor_sharding_overlaps_with_output_sharding(%arg0: tensor<4x2x3xf32> {sdy.sharding = #sdy.sharding<@mesh, [{}, {"y"}, {}]>}, %arg1: tensor<4x2x2x1xi32> {sdy.sharding = #sdy.sharding<@mesh, [{}, {"y"}, {}, {}]>}) -> (tensor<4x2x2x3xf32> {sdy.sharding = #sdy.sharding<@mesh, [{}, {"y"}, {}, {}]>}) {
--  // COM: sdy.sharding_rule = #sdy.op_sharding_rule<([i, m, l], [i, j, k, n])->([i, j, k, l]) {i=4, j=2, k=2, l=3, m=2, n=1} reduction={m} need_replication={n}>
--
--  // CHECK-NEXT: %[[RESHARD:.*]] = sdy.reshard %arg0 <@mesh, [{}, {}, {}]>
--  // CHECK-NEXT: %[[GATHER:.*]] = "stablehlo.gather"(%[[RESHARD]], %arg1)
--  // CHECK-SAME: {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{}, {"y"}, {}, {}]>]>}
--  // CHECK-NEXT: return %[[GATHER]]
--  %0 = "stablehlo.gather"(%arg0, %arg1) <{dimension_numbers = #stablehlo.gather<offset_dims = [3], collapsed_slice_dims = [1], operand_batching_dims = [0], start_indices_batching_dims = [0], start_index_map = [1], index_vector_dim = 3>, indices_are_sorted = false, slice_sizes = array<i64: 1, 1, 3>}> {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{}, {"y"}, {}, {}]>]>} : (tensor<4x2x3xf32>, tensor<4x2x2x1xi32>) -> tensor<4x2x2x3xf32>
--  return %0 : tensor<4x2x2x3xf32>
--}
--
-diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch
-index 69e643a..509398d 100644
---- a/third_party/llvm/generated.patch
-+++ b/third_party/llvm/generated.patch
-@@ -1,161 +1 @@
- Auto generated patch. Do not edit or delete it, even if empty.
--diff -ruN --strip-trailing-cr a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
----- a/clang/include/clang/Driver/Options.td
--+++ b/clang/include/clang/Driver/Options.td
--@@ -545,15 +545,16 @@
--               Group<f_Group>;
-- }
-- 
---// Creates a BoolOption where both of the flags are prefixed with "g" and have
---// the Group<g_Group>.
--+// Creates a BoolOption where both of the flags are prefixed with "g".
--+// Does *not* map to g_Group, because that is reserved for flags that are
--+// intended to enable (or disable) debug info, which is not appropriate for a
--+// negative boolean flag (-gno-${feature}).
-- // Used for -cc1 frontend options. Driver-only options do not map to
-- // CompilerInvocation.
-- multiclass BoolGOption<string flag_base, KeyPathAndMacro kpm,
--                        Default default, FlagDef flag1, FlagDef flag2,
--                        BothFlags both = BothFlags<[]>> {
---  defm NAME : BoolOption<"g", flag_base, kpm, default, flag1, flag2, both>,
---              Group<g_Group>;
--+  defm NAME : BoolOption<"g", flag_base, kpm, default, flag1, flag2, both>;
-- }
-- 
-- multiclass BoolMOption<string flag_base, KeyPathAndMacro kpm,
--@@ -4845,8 +4846,7 @@
--                   NegFlag<SetFalse>,
--                   PosFlag<SetTrue, [], [],
--                           "Attach linkage names to C++ constructor/destructor "
---                          "declarations in DWARF."
---                          "Implies -g.">,
--+                          "declarations in DWARF.">,
--                   BothFlags<[], [ClangOption, CLOption, CC1Option]>>,
--                   DocBrief<[{On some ABIs (e.g., Itanium), constructors and destructors may have multiple variants. Historically, when generating DWARF, Clang did not attach ``DW_AT_linkage_name`` to structor DIEs because there were multiple possible manglings (depending on the structor variant) that could be used. With ``-gstructor-decl-linkage-names``, for ABIs with structor variants, we attach a "unified" mangled name to structor declarations DIEs which debuggers can use to look up all the definitions for a structor declaration. E.g., a "unified" mangled name ``_ZN3FooC4Ev`` may have multiple definitions associated with it such as ``_ZN3FooC1Ev`` and ``_ZN3FooC2Ev``.
-- 
--@@ -4855,7 +4855,7 @@
--     CodeGenOpts<"DebugKeyInstructions">, DefaultFalse,
--     NegFlag<SetFalse>, PosFlag<SetTrue, [], [],
--         "Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code"
---        " in some debuggers. DWARF only. Implies -g.">,
--+        " in some debuggers. DWARF only.">,
--     BothFlags<[], [ClangOption, CLOption, CC1Option]>>;
-- def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">;
-- def help : Flag<["-", "--"], "help">,
--diff -ruN --strip-trailing-cr a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
----- a/clang/lib/AST/DeclTemplate.cpp
--+++ b/clang/lib/AST/DeclTemplate.cpp
--@@ -1670,20 +1670,25 @@
--     auto P = CTSD->getSpecializedTemplateOrPartial();
--     TemplateParameterList *TPL;
--     if (const auto *CTPSD =
---            dyn_cast<ClassTemplatePartialSpecializationDecl *>(P))
--+            dyn_cast<ClassTemplatePartialSpecializationDecl *>(P)) {
--       TPL = CTPSD->getTemplateParameters();
---    else
---      TPL = cast<ClassTemplateDecl *>(P)->getTemplateParameters();
--+      // FIXME: Obtain Args deduced for the partial specialization.
--+      return {TPL->getParam(Index), {}};
--+    }
--+    TPL = cast<ClassTemplateDecl *>(P)->getTemplateParameters();
--     return {TPL->getParam(Index), CTSD->getTemplateArgs()[Index]};
--   }
--   case Decl::Kind::VarTemplateSpecialization: {
--     const auto *VTSD = cast<VarTemplateSpecializationDecl>(D);
--     auto P = VTSD->getSpecializedTemplateOrPartial();
--     TemplateParameterList *TPL;
---    if (const auto *VTPSD = dyn_cast<VarTemplatePartialSpecializationDecl *>(P))
--+    if (const auto *VTPSD =
--+            dyn_cast<VarTemplatePartialSpecializationDecl *>(P)) {
--       TPL = VTPSD->getTemplateParameters();
---    else
---      TPL = cast<VarTemplateDecl *>(P)->getTemplateParameters();
--+      // FIXME: Obtain Args deduced for the partial specialization.
--+      return {TPL->getParam(Index), {}};
--+    }
--+    TPL = cast<VarTemplateDecl *>(P)->getTemplateParameters();
--     return {TPL->getParam(Index), VTSD->getTemplateArgs()[Index]};
--   }
--   case Decl::Kind::ClassTemplatePartialSpecialization:
--diff -ruN --strip-trailing-cr a/clang/test/DebugInfo/KeyInstructions/flag.cpp b/clang/test/DebugInfo/KeyInstructions/flag.cpp
----- a/clang/test/DebugInfo/KeyInstructions/flag.cpp
--+++ b/clang/test/DebugInfo/KeyInstructions/flag.cpp
--@@ -1,12 +1,15 @@
-- // RUN: %clang -### -target x86_64 -c -gdwarf -gkey-instructions %s 2>&1 | FileCheck %s --check-prefixes=KEY-INSTRUCTIONS
-- // RUN: %clang -### -target x86_64 -c -gdwarf -gno-key-instructions %s 2>&1 | FileCheck %s --check-prefixes=NO-KEY-INSTRUCTIONS
--+// RUN: %clang -### -target x86_64 -c -gno-key-instructions %s 2>&1 | FileCheck %s --check-prefixes=NO-DEBUG
-- 
-- //// Help.
-- // RUN %clang --help | FileCheck %s --check-prefix=HELP
---// HELP: -gkey-instructions  Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only. Implies -g.
--+// HELP: -gkey-instructions  Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only.
-- 
-- // KEY-INSTRUCTIONS: "-gkey-instructions"
-- // NO-KEY-INSTRUCTIONS-NOT: key-instructions
--+// NO-DEBUG-NOT: debug-info-kind
--+// NO-DEBUG-NOT: dwarf
-- 
-- //// Help hidden: flag should not be visible.
-- // RUN: %clang --help | FileCheck %s --check-prefix=HELP
--diff -ruN --strip-trailing-cr a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c
----- a/clang/test/Driver/debug-options.c
--+++ b/clang/test/Driver/debug-options.c
--@@ -268,11 +268,11 @@
-- // RUN: %clang -### -c %s 2>&1 | FileCheck -check-prefix=NORNGBSE %s
-- // RUN: %clang -### -c -fdebug-ranges-base-address -fno-debug-ranges-base-address %s 2>&1 | FileCheck -check-prefix=NORNGBSE %s
-- //
---// RUN: %clang -### -c -gomit-unreferenced-methods -fno-standalone-debug %s 2>&1 | FileCheck -check-prefix=INCTYPES %s
--+// RUN: %clang -### -c -g -gomit-unreferenced-methods -fno-standalone-debug %s 2>&1 | FileCheck -check-prefix=INCTYPES %s
-- // RUN: %clang -### -c %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s
---// RUN: %clang -### -c -gomit-unreferenced-methods -fdebug-types-section -target x86_64-unknown-linux %s 2>&1 \
--+// RUN: %clang -### -c -g -gomit-unreferenced-methods -fdebug-types-section -target x86_64-unknown-linux %s 2>&1 \
-- // RUN:        | FileCheck -check-prefix=NOINCTYPES %s
---// RUN: %clang -### -c -gomit-unreferenced-methods -fstandalone-debug %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s
--+// RUN: %clang -### -c -g -gomit-unreferenced-methods -fstandalone-debug %s 2>&1 | FileCheck -check-prefix=NOINCTYPES %s
-- //
-- // RUN: %clang -### -c -glldb %s 2>&1 | FileCheck -check-prefix=NOPUB %s
-- // RUN: %clang -### -c -glldb -gno-pubnames %s 2>&1 | FileCheck -check-prefix=NOPUB %s
--diff -ruN --strip-trailing-cr a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
----- a/clang/test/SemaTemplate/concepts.cpp
--+++ b/clang/test/SemaTemplate/concepts.cpp
--@@ -1476,3 +1476,20 @@
-- // expected-error@-1 {{static assertion failed due to requirement 'requires { { &f() } -> C; }'}}
-- 
-- }
--+
--+namespace GH162770 {
--+  enum e {};
--+  template<e> struct s {};
--+
--+  template<typename> struct specialized;
--+  template<e x> struct specialized<s<x>> {
--+    static auto make(auto) -> s<x>;
--+  };
--+
--+  template<e x> struct check {
--+    static constexpr auto m = requires { specialized<s<x>>::make(0); };
--+  };
--+
--+  template<typename... Ts> auto comma = (..., Ts());
--+  auto b = comma<check<e{}>>;
--+} // namespace GH162770
--diff -ruN --strip-trailing-cr a/clang/test/SemaTemplate/partial-spec-instantiate.cpp b/clang/test/SemaTemplate/partial-spec-instantiate.cpp
----- a/clang/test/SemaTemplate/partial-spec-instantiate.cpp
--+++ b/clang/test/SemaTemplate/partial-spec-instantiate.cpp
--@@ -152,3 +152,16 @@
--     ClassTemplate<>::Nested<int> instantiation;
--   }
-- }
--+#if __cplusplus >= 201103L
--+namespace GH162855 {
--+  template <class...> using A = int;
--+  template <class, int> struct B;
--+  template <class...> struct C;
--+  template <template <class, int...> class TT, long... X>
--+  struct C<TT<int, X...>> {
--+    template <class... Y> using l = A<B<Y, X>...>;
--+  };
--+  template <class> struct D;
--+  template struct C<D<int>>;
--+} // namespace GH162855
--+#endif
 diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl
-index 81b4a54..ed562d4 100644
+index ed562d4..ba4c1a2 100644
 --- a/third_party/llvm/workspace.bzl
 +++ b/third_party/llvm/workspace.bzl
 @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive")
  
  def repo(name):
      """Imports LLVM."""
--    LLVM_COMMIT = "3a6b818132e3133c7d33f8f577e62503f12869b4"
--    LLVM_SHA256 = "a0b3de698393e0f49d0aca3f869cc03bf0c59eba0c65f608e565278943c31958"
-+    LLVM_COMMIT = "267fa8dd1efce0b79ebcaa804d54542c99918df2"
-+    LLVM_SHA256 = "a72180219b02c46a11fa11d7ca3e5c4f57ecaa348162e010e73a59bd26623950"
+-    LLVM_COMMIT = "267fa8dd1efce0b79ebcaa804d54542c99918df2"
+-    LLVM_SHA256 = "a72180219b02c46a11fa11d7ca3e5c4f57ecaa348162e010e73a59bd26623950"
++    LLVM_COMMIT = "bfee9db7857757e63b64fb4d411a264690ff711a"
++    LLVM_SHA256 = "b14cb659a35562d1fccee470d0bba41cf96363e1b576e113a3a795db9ad78e3e"
  
      tf_http_archive(
          name = name,
diff --git a/third_party/xla/third_party/shardy/workspace.bzl b/third_party/xla/third_party/shardy/workspace.bzl
index 65d610ac6a7..dd332adb67a 100644
--- a/third_party/xla/third_party/shardy/workspace.bzl
+++ b/third_party/xla/third_party/shardy/workspace.bzl
@@ -3,8 +3,8 @@
 load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls")
 
 def repo():
-    SHARDY_COMMIT = "5cc8d44a7428d36b18965e65d93287be66ea7faa"
-    SHARDY_SHA256 = "f12259a31d64a5220b48351bf2c2409679cf789ec2bf1b7cda0e5d516e918f97"
+    SHARDY_COMMIT = "8349f4b45638848d28fb7ad366c68d4ed16710ab"
+    SHARDY_SHA256 = "cc7856517bec649903d302496922cef92373bb654c5f53da0014822bbff6835f"
 
     tf_http_archive(
         name = "shardy",