diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 86cb3b28b6e..293b752ab72 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -544,6 +544,11 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK) # Disable I8MM For CI since clang 9 does not support neon i8mm. set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "") + # Older MSVC versions don't support AVX512FP. TODO Minimum version support? + IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC") + set(XNNPACK_ENABLE_AVX512FP16 OFF CACHE BOOL "") + ENDIF() + # Conditionally disable AVX512AMX, as it requires Clang 11 or later. Note that # XNNPACK does conditionally compile this based on GCC version. Once it also does # so based on Clang version, this logic can be removed. diff --git a/third_party/XNNPACK b/third_party/XNNPACK index fcbf55af6cf..87ee0b46b83 160000 --- a/third_party/XNNPACK +++ b/third_party/XNNPACK @@ -1 +1 @@ -Subproject commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2 +Subproject commit 87ee0b46b834f67bad9025d4a82ed5654f3403d3 diff --git a/third_party/generate-xnnpack-wrappers.py b/third_party/generate-xnnpack-wrappers.py index 34171f67650..e9b23e4a784 100755 --- a/third_party/generate-xnnpack-wrappers.py +++ b/third_party/generate-xnnpack-wrappers.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from __future__ import print_function +from pathlib import Path import collections import os import sys @@ -99,12 +100,24 @@ def handle_singleline_parse(line): return key_val[0], [x[4:] for x in key_val[1:]] def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"): + print(f"Updating sources from {cmakefile}") sources = collections.defaultdict(list) with open(os.path.join(xnnpack_path, cmakefile)) as cmake: lines = cmake.readlines() i = 0 while i < len(lines): line = lines[i] + + if lines[i].startswith("INCLUDE"): + file, _ = handle_singleline_parse(line) + if file.startswith("cmake/gen/"): + path = Path(xnnpack_path) / "XNNPACK" / file + local_sources = update_sources(xnnpack_path, path.absolute().as_posix()) + for k,v in local_sources.items(): + if k in sources: + sources[k] = sources[k] + local_sources[k] + else: + sources[k] = local_sources[k] if lines[i].startswith("SET") and "src/" in lines[i]: name, val = handle_singleline_parse(line) @@ -132,7 +145,7 @@ def gen_wrappers(xnnpack_path): xnnpack_sources = collections.defaultdict(list) sources = update_sources(xnnpack_path) - microkernels_sources = update_sources(xnnpack_path, "XNNPACK/cmake/microkernels.cmake") + microkernels_sources = update_sources(xnnpack_path, "XNNPACK/cmake/gen/microkernels.cmake") for key in microkernels_sources: sources[key] = microkernels_sources[key] @@ -186,6 +199,8 @@ def gen_wrappers(xnnpack_path): def main(argv): + print("Generating wrappers...") + if argv is None or len(argv) == 0: gen_wrappers(".") else: diff --git a/third_party/xnnpack.buck.bzl b/third_party/xnnpack.buck.bzl index cb351261d40..144dc8513ec 100644 --- a/third_party/xnnpack.buck.bzl +++ b/third_party/xnnpack.buck.bzl @@ -4,7 +4,6 @@ load("//tools/build_defs:glob_defs.bzl", "subdir_glob") load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX", "WINDOWS") load( ":xnnpack_src_defs.bzl", - "JIT_SRCS", "LOGGING_SRCS", "OPERATOR_SRCS", "SUBGRAPH_SRCS", @@ -108,7 +107,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F preferred_linkage = "static", preprocessor_flags = [ "-DXNN_LOG_LEVEL=0", - "-DXNN_ENABLE_JIT=0", "-DXNN_ENABLE_SPARSE=0", "-DXNN_ENABLE_MEMOPT", ], @@ -154,37 +152,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F ], ) - fb_xplat_cxx_library( - name = "jit_memory", - # srcs have to include HOT_SRCS to be able to build on ARVR - srcs = JIT_SRCS, - headers = subdir_glob([ - ("XNNPACK/src", "**/*.h"), - ]), - header_namespace = "", - apple_sdks = (IOS, MACOSX, APPLETVOS), - compiler_flags = [ - "-Oz", - ], - fbobjc_preprocessor_flags = [ - "-DXNN_PRIVATE=", - "-DXNN_INTERNAL=", - ], - labels = labels, - platforms = (APPLE, ANDROID, CXX, WINDOWS), - preferred_linkage = "static", - preprocessor_flags = [ - "-DXNN_LOG_LEVEL=0", - ], - visibility = ["PUBLIC"], - windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS, - windows_compiler_flags_override = WINDOWS_FLAGS, - deps = [ - ":interface", - third_party("clog"), - ], - ) - fb_xplat_cxx_library( name = "ukernels_scalar", srcs = PROD_SCALAR_MICROKERNEL_SRCS, @@ -792,6 +759,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F preprocessor_flags = [ "-DXNN_LOG_LEVEL=0", ], + exported_preprocessor_flags = [ + "-DXNN_ENABLE_AVX512VNNI" + ], visibility = ["PUBLIC"], windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"], windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"], @@ -833,6 +803,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F preprocessor_flags = [ "-DXNN_LOG_LEVEL=0", ], + exported_preprocessor_flags = [ + "-DXNN_ENABLE_AVX512VNNI" + ], visibility = ["PUBLIC"], windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"], windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"], @@ -1328,6 +1301,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F "-mf16c", ], windows_compiler_flags_override = WINDOWS_FLAGS + [ + "/D__AVX2__", "-mavx2", "-mfma", "-mf16c", @@ -1576,6 +1550,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F "-mavx512bw", "-mavx512dq", "-mavx512vl", + ], deps = [ ":interface", @@ -1633,6 +1608,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F "-mavx512bw", "-mavx512dq", "-mavx512vl", + "/D__AVX512BW__", ], windows_srcs = PROD_AVX512SKX_MICROKERNEL_SRCS, deps = [ @@ -2463,7 +2439,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F windows_compiler_flags_override = WINDOWS_FLAGS, deps = [ ":interface", - ":jit_memory", third_party("FP16"), ], ) @@ -2507,7 +2482,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F windows_compiler_flags_override = WINDOWS_FLAGS, deps = [ ":interface", - ":jit_memory", third_party("FP16"), ], ) @@ -2519,7 +2493,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F preferred_linkage = "static", visibility = ["PUBLIC"], deps = [ - ":jit_memory", ":ukernels_asm_aarch64", ":ukernels_neon", ":ukernels_neon_aarch64", @@ -2581,10 +2554,13 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F ":ukernels_ssse3_ovr_win32", ":ukernels_xop_ovr_win32", ":ukernels_avx512vbmi", - ":ukernels_avx512vnni_ovr_win32", - ":ukernels_avx512vnnigfni_ovr_win32", + # ":ukernels_avx512vnni_ovr_win32", # Build crashes on Windows Clang 17.0.3, re-enable when fixed (T199959765) + # ":ukernels_avx512vnnigfni_ovr_win32", # ":ukernels_avxvnni_ovr_win32" Excluding avxvnni microkernels because they fail on older compilers ], + exported_preprocessor_flags = [ + "-DXNN_ENABLE_AVX512VNNIGFNI=0" + ] ) fb_xplat_cxx_library( @@ -2594,7 +2570,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F preferred_linkage = "static", visibility = ["PUBLIC"], deps = [ - ":jit_memory", ":ukernels_armsimd32", ":ukernels_asm_aarch32", ":ukernels_asm_aarch64", @@ -2622,7 +2597,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F preferred_linkage = "static", visibility = ["PUBLIC"], deps = [ - ":jit_memory", ":ukernels_asm_aarch32", ":ukernels_neon", ":ukernels_neon_dot", @@ -2690,7 +2664,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F "-DXNN_NO_X8_OPERATORS", "-DXNN_ENABLE_MEMOPT", "-DXNN_ENABLE_SPARSE=0", - "-DXNN_ENABLE_JIT=0", "-DXNN_ENABLE_ASSEMBLY", "-DXNN_ENABLE_GEMM_M_SPECIALIZATION", "-DXNN_ENABLE_ARM_DOTPROD", @@ -2712,7 +2685,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F "XNNPACK/src/memory.c", "XNNPACK/src/mutex.c", "XNNPACK/src/microparams-init.c", - "XNNPACK/src/operators/post-operation.c", ], visibility = ["PUBLIC"], windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS, diff --git a/third_party/xnnpack_src_defs.bzl b/third_party/xnnpack_src_defs.bzl index 296dacb58ec..e9b3b6f9a9c 100644 --- a/third_party/xnnpack_src_defs.bzl +++ b/third_party/xnnpack_src_defs.bzl @@ -2,16 +2,12 @@ Auto-generated by generate-wrappers.py script. Do not modify """ -PROD_SCALAR_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/scalar.c", +PROD_ARMSIMD32_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/armsimd32.c", ] -PROD_AVX512VNNI_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avx512vnni.c", -] - -PROD_AVX512F_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avx512f.c", +PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neonfp16arith-aarch64.c", ] AARCH64_ASM_MICROKERNEL_SRCS = [ @@ -240,41 +236,237 @@ AARCH64_ASM_MICROKERNEL_SRCS = [ "XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S", "XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S", "XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S", - "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S", "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S", "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S", - "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S", - "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S", - "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", - "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S", "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S", "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S", - "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S", - "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S", - "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", +] + +PROD_AVXVNNI_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avxvnni.c", +] + +SUBGRAPH_SRCS = [ + "XNNPACK/src/memory-planner.c", + "XNNPACK/src/runtime.c", + "XNNPACK/src/subgraph.c", + "XNNPACK/src/subgraph/abs.c", + "XNNPACK/src/subgraph/add2.c", + "XNNPACK/src/subgraph/argmax-pooling-2d.c", + "XNNPACK/src/subgraph/average-pooling-2d.c", + "XNNPACK/src/subgraph/bankers-rounding.c", + "XNNPACK/src/subgraph/batch-matrix-multiply.c", + "XNNPACK/src/subgraph/ceiling.c", + "XNNPACK/src/subgraph/clamp.c", + "XNNPACK/src/subgraph/concatenate.c", + "XNNPACK/src/subgraph/convert.c", + "XNNPACK/src/subgraph/convolution-2d.c", + "XNNPACK/src/subgraph/copy.c", + "XNNPACK/src/subgraph/copysign.c", + "XNNPACK/src/subgraph/deconvolution-2d.c", + "XNNPACK/src/subgraph/depth-to-space-2d.c", + "XNNPACK/src/subgraph/depthwise-convolution-2d.c", + "XNNPACK/src/subgraph/divide.c", + "XNNPACK/src/subgraph/elu.c", + "XNNPACK/src/subgraph/even-split.c", + "XNNPACK/src/subgraph/exp.c", + "XNNPACK/src/subgraph/floor.c", + "XNNPACK/src/subgraph/fully-connected-sparse.c", + "XNNPACK/src/subgraph/fully-connected.c", + "XNNPACK/src/subgraph/gelu.c", + "XNNPACK/src/subgraph/global-average-pooling.c", + "XNNPACK/src/subgraph/global-sum-pooling.c", + "XNNPACK/src/subgraph/hardswish.c", + "XNNPACK/src/subgraph/leaky-relu.c", + "XNNPACK/src/subgraph/log.c", + "XNNPACK/src/subgraph/max-pooling-2d.c", + "XNNPACK/src/subgraph/maximum2.c", + "XNNPACK/src/subgraph/minimum2.c", + "XNNPACK/src/subgraph/multiply2.c", + "XNNPACK/src/subgraph/negate.c", + "XNNPACK/src/subgraph/prelu.c", + "XNNPACK/src/subgraph/reciprocal-square-root.c", + "XNNPACK/src/subgraph/reshape-helpers.c", + "XNNPACK/src/subgraph/scaled-dot-product-attention.c", + "XNNPACK/src/subgraph/sigmoid.c", + "XNNPACK/src/subgraph/softmax.c", + "XNNPACK/src/subgraph/space-to-depth-2d.c", + "XNNPACK/src/subgraph/square-root.c", + "XNNPACK/src/subgraph/square.c", + "XNNPACK/src/subgraph/squared-difference.c", + "XNNPACK/src/subgraph/static-constant-pad.c", + "XNNPACK/src/subgraph/static-mean.c", + "XNNPACK/src/subgraph/static-reshape.c", + "XNNPACK/src/subgraph/static-resize-bilinear-2d.c", + "XNNPACK/src/subgraph/static-slice.c", + "XNNPACK/src/subgraph/static-transpose.c", + "XNNPACK/src/subgraph/subtract.c", + "XNNPACK/src/subgraph/tanh.c", + "XNNPACK/src/subgraph/unpooling-2d.c", + "XNNPACK/src/subgraph/validation.c", + "XNNPACK/src/tensor.c", +] + +PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avx512vnnigfni.c", +] + +PROD_AVX512VNNI_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avx512vnni.c", +] + +PROD_SSE2_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/sse2.c", +] + +PROD_NEONDOT_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neondot.c", +] + +PROD_SSE41_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/sse41.c", +] + +PROD_SSE_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/sse.c", +] + +PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neonfp16arith.c", ] PROD_NEONV8_MICROKERNEL_SRCS = [ "XNNPACK/src/amalgam/gen/neonv8.c", ] +PROD_NEONFP16_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neonfp16.c", +] + +XNNPACK_SRCS = [ + "XNNPACK/src/configs/argmaxpool-config.c", + "XNNPACK/src/configs/avgpool-config.c", + "XNNPACK/src/configs/binary-elementwise-config.c", + "XNNPACK/src/configs/cmul-config.c", + "XNNPACK/src/configs/conv-hwc2chw-config.c", + "XNNPACK/src/configs/dwconv-config.c", + "XNNPACK/src/configs/dwconv2d-chw-config.c", + "XNNPACK/src/configs/experiments-config.c", + "XNNPACK/src/configs/gavgpool-config.c", + "XNNPACK/src/configs/gavgpool-cw-config.c", + "XNNPACK/src/configs/gemm-config.c", + "XNNPACK/src/configs/ibilinear-chw-config.c", + "XNNPACK/src/configs/ibilinear-config.c", + "XNNPACK/src/configs/lut32norm-config.c", + "XNNPACK/src/configs/maxpool-config.c", + "XNNPACK/src/configs/pavgpool-config.c", + "XNNPACK/src/configs/prelu-config.c", + "XNNPACK/src/configs/raddstoreexpminusmax-config.c", + "XNNPACK/src/configs/reduce-config.c", + "XNNPACK/src/configs/rmax-config.c", + "XNNPACK/src/configs/spmm-config.c", + "XNNPACK/src/configs/transpose-config.c", + "XNNPACK/src/configs/unary-elementwise-config.c", + "XNNPACK/src/configs/unpool-config.c", + "XNNPACK/src/configs/vmulcaddc-config.c", + "XNNPACK/src/configs/xx-fill-config.c", + "XNNPACK/src/configs/xx-pad-config.c", + "XNNPACK/src/configs/x8-lut-config.c", + "XNNPACK/src/configs/zip-config.c", + "XNNPACK/src/init.c", + "XNNPACK/src/params.c", +] + PROD_AVX_MICROKERNEL_SRCS = [ "XNNPACK/src/amalgam/gen/avx.c", ] +PROD_AVX512SKX_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avx512skx.c", +] + +PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neondotfp16-aarch64.c", +] + +PROD_FP16ARITH_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/fp16arith.c", +] + +PROD_FMA_MICROKERNEL_SRCS = [ +] + +OPERATOR_SRCS = [ + "XNNPACK/src/operator-delete.c", + "XNNPACK/src/operators/argmax-pooling-nhwc.c", + "XNNPACK/src/operators/average-pooling-nhwc.c", + "XNNPACK/src/operators/batch-matrix-multiply-nc.c", + "XNNPACK/src/operators/binary-elementwise-nd.c", + "XNNPACK/src/operators/channel-shuffle-nc.c", + "XNNPACK/src/operators/constant-pad-nd.c", + "XNNPACK/src/operators/convolution-nchw.c", + "XNNPACK/src/operators/convolution-nhwc.c", + "XNNPACK/src/operators/deconvolution-nhwc.c", + "XNNPACK/src/operators/dynamic-fully-connected-nc.c", + "XNNPACK/src/operators/fully-connected-nc.c", + "XNNPACK/src/operators/global-average-pooling-ncw.c", + "XNNPACK/src/operators/global-average-pooling-nwc.c", + "XNNPACK/src/operators/lut-elementwise-nc.c", + "XNNPACK/src/operators/max-pooling-nhwc.c", + "XNNPACK/src/operators/prelu-nc.c", + "XNNPACK/src/operators/reduce-nd.c", + "XNNPACK/src/operators/resize-bilinear-nchw.c", + "XNNPACK/src/operators/resize-bilinear-nhwc.c", + "XNNPACK/src/operators/rope-nthc.c", + "XNNPACK/src/operators/scaled-dot-product-attention-nhtc.c", + "XNNPACK/src/operators/slice-nd.c", + "XNNPACK/src/operators/softmax-nc.c", + "XNNPACK/src/operators/transpose-nd.c", + "XNNPACK/src/operators/unary-elementwise-nc.c", + "XNNPACK/src/operators/unpooling-nhwc.c", +] + +PROD_NEONI8MM_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neoni8mm.c", +] + +PROD_AVX512F_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avx512f.c", +] + +JIT_SRCS = [ +] + +PROD_F16C_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/f16c.c", +] + +PROD_NEON_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neon.c", +] + +PROD_SCALAR_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/scalar.c", +] + +PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neondot-aarch64.c", +] + +PROD_FMA3_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/fma3.c", +] + LOGGING_SRCS = [ + "XNNPACK/src/enums/allocation-type.c", "XNNPACK/src/enums/datatype-strings.c", "XNNPACK/src/enums/microkernel-type.c", "XNNPACK/src/enums/node-type.c", @@ -282,8 +474,27 @@ LOGGING_SRCS = [ "XNNPACK/src/log.c", ] -PROD_NEONI8MM_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neoni8mm.c", +PROD_NEONFMA_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neonfma.c", +] + +PROD_AVX2_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avx2.c", +] + +PROD_AVX512VBMI_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/avx512vbmi.c", +] + +PROD_RVV_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/rvv.c", +] + +PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS = [ + "XNNPACK/src/amalgam/gen/neondotfp16arith.c", +] + +PROD_XOP_MICROKERNEL_SRCS = [ ] AARCH32_ASM_MICROKERNEL_SRCS = [ @@ -376,134 +587,10 @@ AARCH32_ASM_MICROKERNEL_SRCS = [ "XNNPACK/src/u32-filterbank-accumulate/u32-filterbank-accumulate-asm-aarch32-neon-x2.S", ] -PROD_F16C_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/f16c.c", -] - -PROD_XOP_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/xop.c", -] - -PROD_RVV_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/rvv.c", -] - -SUBGRAPH_SRCS = [ - "XNNPACK/src/memory-planner.c", - "XNNPACK/src/runtime.c", - "XNNPACK/src/subgraph.c", - "XNNPACK/src/subgraph/abs.c", - "XNNPACK/src/subgraph/add2.c", - "XNNPACK/src/subgraph/argmax-pooling-2d.c", - "XNNPACK/src/subgraph/average-pooling-2d.c", - "XNNPACK/src/subgraph/bankers-rounding.c", - "XNNPACK/src/subgraph/batch-matrix-multiply.c", - "XNNPACK/src/subgraph/ceiling.c", - "XNNPACK/src/subgraph/clamp.c", - "XNNPACK/src/subgraph/concatenate.c", - "XNNPACK/src/subgraph/convert.c", - "XNNPACK/src/subgraph/convolution-2d.c", - "XNNPACK/src/subgraph/copy.c", - "XNNPACK/src/subgraph/deconvolution-2d.c", - "XNNPACK/src/subgraph/depth-to-space-2d.c", - "XNNPACK/src/subgraph/depthwise-convolution-2d.c", - "XNNPACK/src/subgraph/divide.c", - "XNNPACK/src/subgraph/elu.c", - "XNNPACK/src/subgraph/even-split.c", - "XNNPACK/src/subgraph/floor.c", - "XNNPACK/src/subgraph/fully-connected-sparse.c", - "XNNPACK/src/subgraph/fully-connected.c", - "XNNPACK/src/subgraph/global-average-pooling.c", - "XNNPACK/src/subgraph/global-sum-pooling.c", - "XNNPACK/src/subgraph/hardswish.c", - "XNNPACK/src/subgraph/leaky-relu.c", - "XNNPACK/src/subgraph/max-pooling-2d.c", - "XNNPACK/src/subgraph/maximum2.c", - "XNNPACK/src/subgraph/minimum2.c", - "XNNPACK/src/subgraph/multiply2.c", - "XNNPACK/src/subgraph/negate.c", - "XNNPACK/src/subgraph/prelu.c", - "XNNPACK/src/subgraph/reshape-helpers.c", - "XNNPACK/src/subgraph/scaled-dot-product-attention.c", - "XNNPACK/src/subgraph/sigmoid.c", - "XNNPACK/src/subgraph/softmax.c", - "XNNPACK/src/subgraph/space-to-depth-2d.c", - "XNNPACK/src/subgraph/square-root.c", - "XNNPACK/src/subgraph/square.c", - "XNNPACK/src/subgraph/squared-difference.c", - "XNNPACK/src/subgraph/static-constant-pad.c", - "XNNPACK/src/subgraph/static-mean.c", - "XNNPACK/src/subgraph/static-reshape.c", - "XNNPACK/src/subgraph/static-resize-bilinear-2d.c", - "XNNPACK/src/subgraph/static-slice.c", - "XNNPACK/src/subgraph/static-transpose.c", - "XNNPACK/src/subgraph/subtract.c", - "XNNPACK/src/subgraph/tanh.c", - "XNNPACK/src/subgraph/unpooling-2d.c", - "XNNPACK/src/subgraph/validation.c", - "XNNPACK/src/tensor.c", -] - -PROD_FMA3_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/fma3.c", -] - -PROD_AVX512SKX_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avx512skx.c", -] - -JIT_SRCS = [ - "XNNPACK/src/jit/aarch32-assembler.cc", - "XNNPACK/src/jit/aarch64-assembler.cc", - "XNNPACK/src/jit/assembler.cc", -] - -PROD_NEONFP16_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neonfp16.c", -] - PROD_SSSE3_MICROKERNEL_SRCS = [ "XNNPACK/src/amalgam/gen/ssse3.c", ] -XNNPACK_SRCS = [ - "XNNPACK/src/configs/argmaxpool-config.c", - "XNNPACK/src/configs/avgpool-config.c", - "XNNPACK/src/configs/binary-elementwise-config.c", - "XNNPACK/src/configs/cmul-config.c", - "XNNPACK/src/configs/conv-hwc2chw-config.c", - "XNNPACK/src/configs/dwconv-config.c", - "XNNPACK/src/configs/dwconv2d-chw-config.c", - "XNNPACK/src/configs/experiments-config.c", - "XNNPACK/src/configs/gavgpool-config.c", - "XNNPACK/src/configs/gavgpool-cw-config.c", - "XNNPACK/src/configs/gemm-config.c", - "XNNPACK/src/configs/ibilinear-chw-config.c", - "XNNPACK/src/configs/ibilinear-config.c", - "XNNPACK/src/configs/lut32norm-config.c", - "XNNPACK/src/configs/maxpool-config.c", - "XNNPACK/src/configs/pavgpool-config.c", - "XNNPACK/src/configs/prelu-config.c", - "XNNPACK/src/configs/raddstoreexpminusmax-config.c", - "XNNPACK/src/configs/reduce-config.c", - "XNNPACK/src/configs/rmax-config.c", - "XNNPACK/src/configs/spmm-config.c", - "XNNPACK/src/configs/transpose-config.c", - "XNNPACK/src/configs/unary-elementwise-config.c", - "XNNPACK/src/configs/unpool-config.c", - "XNNPACK/src/configs/vmulcaddc-config.c", - "XNNPACK/src/configs/xx-fill-config.c", - "XNNPACK/src/configs/xx-pad-config.c", - "XNNPACK/src/configs/x8-lut-config.c", - "XNNPACK/src/configs/zip-config.c", - "XNNPACK/src/init.c", - "XNNPACK/src/params.c", -] - -PROD_FP16ARITH_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/fp16arith.c", -] - TABLE_SRCS = [ "XNNPACK/src/tables/exp2-k-over-64.c", "XNNPACK/src/tables/exp2-k-over-2048.c", @@ -516,105 +603,7 @@ TABLE_SRCS = [ "XNNPACK/src/tables/vlog.c", ] -PROD_NEON_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neon.c", -] - -PROD_AVXVNNI_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avxvnni.c", -] - -PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neonfp16arith.c", -] - -PROD_SSE_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/sse.c", -] - PROD_NEON_AARCH64_MICROKERNEL_SRCS = [ "XNNPACK/src/amalgam/gen/neon-aarch64.c", "XNNPACK/src/amalgam/gen/neonfma-aarch64.c", ] - -PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neondotfp16-aarch64.c", -] - -PROD_NEONFMA_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neonfma.c", -] - -PROD_FMA_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/fma.c", -] - -PROD_SSE2_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/sse2.c", -] - -PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avx512vnnigfni.c", -] - -PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neonfp16arith-aarch64.c", -] - -PROD_AVX2_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avx2.c", -] - -OPERATOR_SRCS = [ - "XNNPACK/src/operator-delete.c", - "XNNPACK/src/operators/argmax-pooling-nhwc.c", - "XNNPACK/src/operators/average-pooling-nhwc.c", - "XNNPACK/src/operators/batch-matrix-multiply-nc.c", - "XNNPACK/src/operators/binary-elementwise-nd.c", - "XNNPACK/src/operators/channel-shuffle-nc.c", - "XNNPACK/src/operators/constant-pad-nd.c", - "XNNPACK/src/operators/convolution-nchw.c", - "XNNPACK/src/operators/convolution-nhwc.c", - "XNNPACK/src/operators/deconvolution-nhwc.c", - "XNNPACK/src/operators/dynamic-fully-connected-nc.c", - "XNNPACK/src/operators/fully-connected-nc.c", - "XNNPACK/src/operators/global-average-pooling-ncw.c", - "XNNPACK/src/operators/global-average-pooling-nwc.c", - "XNNPACK/src/operators/lut-elementwise-nc.c", - "XNNPACK/src/operators/max-pooling-nhwc.c", - "XNNPACK/src/operators/prelu-nc.c", - "XNNPACK/src/operators/reduce-nd.c", - "XNNPACK/src/operators/resize-bilinear-nchw.c", - "XNNPACK/src/operators/resize-bilinear-nhwc.c", - "XNNPACK/src/operators/rope-nthc.c", - "XNNPACK/src/operators/scaled-dot-product-attention-nhtc.c", - "XNNPACK/src/operators/slice-nd.c", - "XNNPACK/src/operators/softmax-nc.c", - "XNNPACK/src/operators/transpose-nd.c", - "XNNPACK/src/operators/unary-elementwise-nc.c", - "XNNPACK/src/operators/unpooling-nhwc.c", -] - -PROD_AVX512VBMI_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/avx512vbmi.c", -] - -PROD_NEONDOT_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neondot.c", -] - -PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neondot-aarch64.c", -] - -PROD_SSE41_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/sse41.c", -] - -PROD_ARMSIMD32_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/armsimd32.c", -] - -PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS = [ - "XNNPACK/src/amalgam/gen/neondotfp16arith.c", -] diff --git a/third_party/xnnpack_wrapper_defs.bzl b/third_party/xnnpack_wrapper_defs.bzl index b92ebb88d74..b05cdcd5cde 100644 --- a/third_party/xnnpack_wrapper_defs.bzl +++ b/third_party/xnnpack_wrapper_defs.bzl @@ -7,7 +7,6 @@ PROD_SCALAR_MICROKERNEL_SRCS = [ ] PROD_FMA_MICROKERNEL_SRCS = [ - "xnnpack_wrappers/amalgam/gen/fma.c", ] PROD_ARMSIMD32_MICROKERNEL_SRCS = [ @@ -92,7 +91,6 @@ PROD_F16C_MICROKERNEL_SRCS = [ ] PROD_XOP_MICROKERNEL_SRCS = [ - "xnnpack_wrappers/amalgam/gen/xop.c", ] PROD_FMA3_MICROKERNEL_SRCS = [ @@ -447,28 +445,16 @@ AARCH64_ASM_MICROKERNEL_SRCS = [ "xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S", "xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S", "xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S", - "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S", "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S", "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S", - "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S", - "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S", - "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", - "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S", "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S", "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S", "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S", "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S", "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S", - "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S", - "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S", - "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S", - "xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S", ]