[Submodule] Bump FBGEMM to latest (#165544)

Summary: * FBGEMM submodule updated to main * CMake updated to reflect necessary changes * Notably pulls in NVFP4 grouped gemm kernels Test Plan: Reviewers: Subscribers: Tasks: Tags: Signed-off-by: Simon Layton <simonlayton@meta.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/165544 Approved by: https://github.com/cyyever, https://github.com/jeffdaily
2025-12-06 00:20:18 +01:00 · 2025-10-22 14:59:25 +00:00 · 2025-10-22 14:59:25 +00:00 · 0b58d87aec
commit 0b58d87aec
parent 757975ad50
4 changed files with 57 additions and 9 deletions
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@ -289,14 +289,15 @@ IF(USE_FBGEMM_GENAI)

    set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)

-    set(fbgemm_genai_mx8mx8bf16_grouped
+    set(fbgemm_genai_cuh
      "${FBGEMM_GENAI_SRCS}/cutlass_extensions/mx8mx8bf16_grouped/"
+      "${FBGEMM_GENAI_SRCS}/"
    )

    target_include_directories(fbgemm_genai PRIVATE
      ${FBGEMM_THIRD_PARTY}/cutlass/include
      ${FBGEMM_THIRD_PARTY}/cutlass/tools/util/include
-      ${fbgemm_genai_mx8mx8bf16_grouped}
+      ${fbgemm_genai_cuh}
      ${FBGEMM_GENAI_SRCS}/common/include/   # includes fbgemm_gpu/quantize/utils.h, fbgemm_gpu/quantize/tuning_cache.hpp
      ${FBGEMM_GENAI_SRCS}/include/          # includes fbgemm_gpu/torch_ops.h
    )
--- a/third_party/fbgemm
+++ b/third_party/fbgemm
@ -1 +1 @@
-Subproject commit 3cefe0564a8c3de514a152d40a2b4770f2ee5be0
+Subproject commit c0b988d39a9e47c794d699f29930ed4d7c7e13a4
--- a/tools/amd_build/build_amd.py
+++ b/tools/amd_build/build_amd.py
@ -201,18 +201,63 @@ for hip_platform_file in hip_platform_files:
                    sources.write(line)
            print(f"{hip_platform_file} updated")

+# NOTE: fbgemm sources needing hipify
+# fbgemm is its own project with its own build system. pytorch uses fbgemm as
+# a submodule to acquire some gpu source files but compiles only those sources
+# instead of using fbgemm's own build system. One of the source files refers
+# to a header file that is the result of running hipify, but fbgemm uses
+# slightly different hipify settings than pytorch. fbgemm normally hipifies
+# and renames tuning_cache.cuh to tuning_cache_hip.cuh, but pytorch's settings
+# for hipify puts it into its own 'hip' directory. After hipify runs below with
+# the added fbgemm file, we move it to its expected location.
+# NOTE: Internal meta builds (using buck) don't need this step, so conditionally disable it
+buck_build = os.environ.get("FBCODE_BUILD_TOOL", "") == "buck"
+
+extra_files = [
+    "torch/_inductor/codegen/cuda/device_op_overrides.py",
+    "torch/_inductor/codegen/cpp_wrapper_cpu.py",
+    "torch/_inductor/codegen/cpp_wrapper_gpu.py",
+    "torch/_inductor/codegen/wrapper.py",
+]
+
+fbgemm_dir = (
+    REPO_ROOT
+    / "third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include/fbgemm_gpu/quantize"
+)
+
+if not buck_build:
+    fbgemm_original = fbgemm_dir / "tuning_cache.cuh"
+
+    extra_files.append(fbgemm_original.as_posix())

 hipify_python.hipify(
    project_directory=proj_dir,
    output_directory=out_dir,
    includes=includes,
    ignores=ignores,
-    extra_files=[
-        "torch/_inductor/codegen/cuda/device_op_overrides.py",
-        "torch/_inductor/codegen/cpp_wrapper_cpu.py",
-        "torch/_inductor/codegen/cpp_wrapper_gpu.py",
-        "torch/_inductor/codegen/wrapper.py",
-    ],
+    extra_files=extra_files,
    out_of_place_only=args.out_of_place_only,
    hip_clang_launch=is_hip_clang(),
 )
+
+if not buck_build:
+    fbgemm_move_src = fbgemm_dir / "hip/tuning_cache.cuh"
+    fbgemm_move_dst = fbgemm_dir / "tuning_cache_hip.cuh"
+
+    # only update the file if it changes or doesn't exist
+    do_write = True
+    src_lines = None
+    with open(fbgemm_move_src) as src:
+        src_lines = src.readlines()
+    if os.path.exists(fbgemm_move_dst):
+        dst_lines = None
+        with open(fbgemm_move_dst) as dst:
+            dst_lines = dst.readlines()
+        if src_lines == dst_lines:
+            print(f"{fbgemm_move_dst} skipped")
+            do_write = False
+    if do_write:
+        with open(fbgemm_move_dst, "w") as dst:
+            for line in src_lines:
+                dst.write(line)
+        print(f"{fbgemm_move_dst} updated")
--- a/torch/utils/hipify/hipify_python.py
+++ b/torch/utils/hipify/hipify_python.py
@ -639,6 +639,8 @@ def is_pytorch_file(rel_filepath):
        return True
    if rel_filepath.startswith("third_party/nvfuser/"):
        return True
+    if rel_filepath.startswith("third_party/fbgemm/"):
+        return True
    if rel_filepath.startswith("tools/autograd/templates/"):
        return True
    return False