Update PyTorch for XNNPACK 87ee0b4 (#134518)

Summary: Update XNNPACK library version.

Test Plan: Combined diff CI is clean: D61586079 (all changes, has to be split out for export).

Differential Revision: D61822610

Pull Request resolved: https://github.com/pytorch/pytorch/pull/134518
Approved by: https://github.com/mcr229
This commit is contained in:
Gregory Comer 2024-08-28 19:24:02 +00:00 committed by PyTorch MergeBot
parent 042b733ddd
commit 3b40b07efb
6 changed files with 269 additions and 302 deletions

View File

@ -544,6 +544,11 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
# Disable I8MM For CI since clang 9 does not support neon i8mm.
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
# Older MSVC versions don't support AVX512FP. TODO Minimum version support?
IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
set(XNNPACK_ENABLE_AVX512FP16 OFF CACHE BOOL "")
ENDIF()
# Conditionally disable AVX512AMX, as it requires Clang 11 or later. Note that
# XNNPACK does conditionally compile this based on GCC version. Once it also does
# so based on Clang version, this logic can be removed.

2
third_party/XNNPACK vendored

@ -1 +1 @@
Subproject commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2
Subproject commit 87ee0b46b834f67bad9025d4a82ed5654f3403d3

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
from __future__ import print_function
from pathlib import Path
import collections
import os
import sys
@ -99,6 +100,7 @@ def handle_singleline_parse(line):
return key_val[0], [x[4:] for x in key_val[1:]]
def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
print(f"Updating sources from {cmakefile}")
sources = collections.defaultdict(list)
with open(os.path.join(xnnpack_path, cmakefile)) as cmake:
lines = cmake.readlines()
@ -106,6 +108,17 @@ def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
while i < len(lines):
line = lines[i]
if lines[i].startswith("INCLUDE"):
file, _ = handle_singleline_parse(line)
if file.startswith("cmake/gen/"):
path = Path(xnnpack_path) / "XNNPACK" / file
local_sources = update_sources(xnnpack_path, path.absolute().as_posix())
for k,v in local_sources.items():
if k in sources:
sources[k] = sources[k] + local_sources[k]
else:
sources[k] = local_sources[k]
if lines[i].startswith("SET") and "src/" in lines[i]:
name, val = handle_singleline_parse(line)
sources[name].extend(val)
@ -132,7 +145,7 @@ def gen_wrappers(xnnpack_path):
xnnpack_sources = collections.defaultdict(list)
sources = update_sources(xnnpack_path)
microkernels_sources = update_sources(xnnpack_path, "XNNPACK/cmake/microkernels.cmake")
microkernels_sources = update_sources(xnnpack_path, "XNNPACK/cmake/gen/microkernels.cmake")
for key in microkernels_sources:
sources[key] = microkernels_sources[key]
@ -186,6 +199,8 @@ def gen_wrappers(xnnpack_path):
def main(argv):
print("Generating wrappers...")
if argv is None or len(argv) == 0:
gen_wrappers(".")
else:

View File

@ -4,7 +4,6 @@ load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX", "WINDOWS")
load(
":xnnpack_src_defs.bzl",
"JIT_SRCS",
"LOGGING_SRCS",
"OPERATOR_SRCS",
"SUBGRAPH_SRCS",
@ -108,7 +107,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preferred_linkage = "static",
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
"-DXNN_ENABLE_JIT=0",
"-DXNN_ENABLE_SPARSE=0",
"-DXNN_ENABLE_MEMOPT",
],
@ -154,37 +152,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
],
)
fb_xplat_cxx_library(
name = "jit_memory",
# srcs have to include HOT_SRCS to be able to build on ARVR
srcs = JIT_SRCS,
headers = subdir_glob([
("XNNPACK/src", "**/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-Oz",
],
fbobjc_preprocessor_flags = [
"-DXNN_PRIVATE=",
"-DXNN_INTERNAL=",
],
labels = labels,
platforms = (APPLE, ANDROID, CXX, WINDOWS),
preferred_linkage = "static",
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
windows_compiler_flags_override = WINDOWS_FLAGS,
deps = [
":interface",
third_party("clog"),
],
)
fb_xplat_cxx_library(
name = "ukernels_scalar",
srcs = PROD_SCALAR_MICROKERNEL_SRCS,
@ -792,6 +759,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
],
exported_preprocessor_flags = [
"-DXNN_ENABLE_AVX512VNNI"
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
@ -833,6 +803,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preprocessor_flags = [
"-DXNN_LOG_LEVEL=0",
],
exported_preprocessor_flags = [
"-DXNN_ENABLE_AVX512VNNI"
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
@ -1328,6 +1301,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
"-mf16c",
],
windows_compiler_flags_override = WINDOWS_FLAGS + [
"/D__AVX2__",
"-mavx2",
"-mfma",
"-mf16c",
@ -1576,6 +1550,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
"-mavx512bw",
"-mavx512dq",
"-mavx512vl",
],
deps = [
":interface",
@ -1633,6 +1608,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
"-mavx512bw",
"-mavx512dq",
"-mavx512vl",
"/D__AVX512BW__",
],
windows_srcs = PROD_AVX512SKX_MICROKERNEL_SRCS,
deps = [
@ -2463,7 +2439,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
windows_compiler_flags_override = WINDOWS_FLAGS,
deps = [
":interface",
":jit_memory",
third_party("FP16"),
],
)
@ -2507,7 +2482,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
windows_compiler_flags_override = WINDOWS_FLAGS,
deps = [
":interface",
":jit_memory",
third_party("FP16"),
],
)
@ -2519,7 +2493,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preferred_linkage = "static",
visibility = ["PUBLIC"],
deps = [
":jit_memory",
":ukernels_asm_aarch64",
":ukernels_neon",
":ukernels_neon_aarch64",
@ -2581,10 +2554,13 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
":ukernels_ssse3_ovr_win32",
":ukernels_xop_ovr_win32",
":ukernels_avx512vbmi",
":ukernels_avx512vnni_ovr_win32",
":ukernels_avx512vnnigfni_ovr_win32",
# ":ukernels_avx512vnni_ovr_win32", # Build crashes on Windows Clang 17.0.3, re-enable when fixed (T199959765)
# ":ukernels_avx512vnnigfni_ovr_win32",
# ":ukernels_avxvnni_ovr_win32" Excluding avxvnni microkernels because they fail on older compilers
],
exported_preprocessor_flags = [
"-DXNN_ENABLE_AVX512VNNIGFNI=0"
]
)
fb_xplat_cxx_library(
@ -2594,7 +2570,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preferred_linkage = "static",
visibility = ["PUBLIC"],
deps = [
":jit_memory",
":ukernels_armsimd32",
":ukernels_asm_aarch32",
":ukernels_asm_aarch64",
@ -2622,7 +2597,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
preferred_linkage = "static",
visibility = ["PUBLIC"],
deps = [
":jit_memory",
":ukernels_asm_aarch32",
":ukernels_neon",
":ukernels_neon_dot",
@ -2690,7 +2664,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
"-DXNN_NO_X8_OPERATORS",
"-DXNN_ENABLE_MEMOPT",
"-DXNN_ENABLE_SPARSE=0",
"-DXNN_ENABLE_JIT=0",
"-DXNN_ENABLE_ASSEMBLY",
"-DXNN_ENABLE_GEMM_M_SPECIALIZATION",
"-DXNN_ENABLE_ARM_DOTPROD",
@ -2712,7 +2685,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
"XNNPACK/src/memory.c",
"XNNPACK/src/mutex.c",
"XNNPACK/src/microparams-init.c",
"XNNPACK/src/operators/post-operation.c",
],
visibility = ["PUBLIC"],
windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS,

View File

@ -2,16 +2,12 @@
Auto-generated by generate-wrappers.py script. Do not modify
"""
PROD_SCALAR_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/scalar.c",
PROD_ARMSIMD32_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/armsimd32.c",
]
PROD_AVX512VNNI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512vnni.c",
]
PROD_AVX512F_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512f.c",
PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfp16arith-aarch64.c",
]
AARCH64_ASM_MICROKERNEL_SRCS = [
@ -240,41 +236,237 @@ AARCH64_ASM_MICROKERNEL_SRCS = [
"XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S",
"XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
]
PROD_AVXVNNI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avxvnni.c",
]
SUBGRAPH_SRCS = [
"XNNPACK/src/memory-planner.c",
"XNNPACK/src/runtime.c",
"XNNPACK/src/subgraph.c",
"XNNPACK/src/subgraph/abs.c",
"XNNPACK/src/subgraph/add2.c",
"XNNPACK/src/subgraph/argmax-pooling-2d.c",
"XNNPACK/src/subgraph/average-pooling-2d.c",
"XNNPACK/src/subgraph/bankers-rounding.c",
"XNNPACK/src/subgraph/batch-matrix-multiply.c",
"XNNPACK/src/subgraph/ceiling.c",
"XNNPACK/src/subgraph/clamp.c",
"XNNPACK/src/subgraph/concatenate.c",
"XNNPACK/src/subgraph/convert.c",
"XNNPACK/src/subgraph/convolution-2d.c",
"XNNPACK/src/subgraph/copy.c",
"XNNPACK/src/subgraph/copysign.c",
"XNNPACK/src/subgraph/deconvolution-2d.c",
"XNNPACK/src/subgraph/depth-to-space-2d.c",
"XNNPACK/src/subgraph/depthwise-convolution-2d.c",
"XNNPACK/src/subgraph/divide.c",
"XNNPACK/src/subgraph/elu.c",
"XNNPACK/src/subgraph/even-split.c",
"XNNPACK/src/subgraph/exp.c",
"XNNPACK/src/subgraph/floor.c",
"XNNPACK/src/subgraph/fully-connected-sparse.c",
"XNNPACK/src/subgraph/fully-connected.c",
"XNNPACK/src/subgraph/gelu.c",
"XNNPACK/src/subgraph/global-average-pooling.c",
"XNNPACK/src/subgraph/global-sum-pooling.c",
"XNNPACK/src/subgraph/hardswish.c",
"XNNPACK/src/subgraph/leaky-relu.c",
"XNNPACK/src/subgraph/log.c",
"XNNPACK/src/subgraph/max-pooling-2d.c",
"XNNPACK/src/subgraph/maximum2.c",
"XNNPACK/src/subgraph/minimum2.c",
"XNNPACK/src/subgraph/multiply2.c",
"XNNPACK/src/subgraph/negate.c",
"XNNPACK/src/subgraph/prelu.c",
"XNNPACK/src/subgraph/reciprocal-square-root.c",
"XNNPACK/src/subgraph/reshape-helpers.c",
"XNNPACK/src/subgraph/scaled-dot-product-attention.c",
"XNNPACK/src/subgraph/sigmoid.c",
"XNNPACK/src/subgraph/softmax.c",
"XNNPACK/src/subgraph/space-to-depth-2d.c",
"XNNPACK/src/subgraph/square-root.c",
"XNNPACK/src/subgraph/square.c",
"XNNPACK/src/subgraph/squared-difference.c",
"XNNPACK/src/subgraph/static-constant-pad.c",
"XNNPACK/src/subgraph/static-mean.c",
"XNNPACK/src/subgraph/static-reshape.c",
"XNNPACK/src/subgraph/static-resize-bilinear-2d.c",
"XNNPACK/src/subgraph/static-slice.c",
"XNNPACK/src/subgraph/static-transpose.c",
"XNNPACK/src/subgraph/subtract.c",
"XNNPACK/src/subgraph/tanh.c",
"XNNPACK/src/subgraph/unpooling-2d.c",
"XNNPACK/src/subgraph/validation.c",
"XNNPACK/src/tensor.c",
]
PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512vnnigfni.c",
]
PROD_AVX512VNNI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512vnni.c",
]
PROD_SSE2_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/sse2.c",
]
PROD_NEONDOT_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondot.c",
]
PROD_SSE41_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/sse41.c",
]
PROD_SSE_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/sse.c",
]
PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfp16arith.c",
]
PROD_NEONV8_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonv8.c",
]
PROD_NEONFP16_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfp16.c",
]
XNNPACK_SRCS = [
"XNNPACK/src/configs/argmaxpool-config.c",
"XNNPACK/src/configs/avgpool-config.c",
"XNNPACK/src/configs/binary-elementwise-config.c",
"XNNPACK/src/configs/cmul-config.c",
"XNNPACK/src/configs/conv-hwc2chw-config.c",
"XNNPACK/src/configs/dwconv-config.c",
"XNNPACK/src/configs/dwconv2d-chw-config.c",
"XNNPACK/src/configs/experiments-config.c",
"XNNPACK/src/configs/gavgpool-config.c",
"XNNPACK/src/configs/gavgpool-cw-config.c",
"XNNPACK/src/configs/gemm-config.c",
"XNNPACK/src/configs/ibilinear-chw-config.c",
"XNNPACK/src/configs/ibilinear-config.c",
"XNNPACK/src/configs/lut32norm-config.c",
"XNNPACK/src/configs/maxpool-config.c",
"XNNPACK/src/configs/pavgpool-config.c",
"XNNPACK/src/configs/prelu-config.c",
"XNNPACK/src/configs/raddstoreexpminusmax-config.c",
"XNNPACK/src/configs/reduce-config.c",
"XNNPACK/src/configs/rmax-config.c",
"XNNPACK/src/configs/spmm-config.c",
"XNNPACK/src/configs/transpose-config.c",
"XNNPACK/src/configs/unary-elementwise-config.c",
"XNNPACK/src/configs/unpool-config.c",
"XNNPACK/src/configs/vmulcaddc-config.c",
"XNNPACK/src/configs/xx-fill-config.c",
"XNNPACK/src/configs/xx-pad-config.c",
"XNNPACK/src/configs/x8-lut-config.c",
"XNNPACK/src/configs/zip-config.c",
"XNNPACK/src/init.c",
"XNNPACK/src/params.c",
]
PROD_AVX_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx.c",
]
PROD_AVX512SKX_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512skx.c",
]
PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondotfp16-aarch64.c",
]
PROD_FP16ARITH_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/fp16arith.c",
]
PROD_FMA_MICROKERNEL_SRCS = [
]
OPERATOR_SRCS = [
"XNNPACK/src/operator-delete.c",
"XNNPACK/src/operators/argmax-pooling-nhwc.c",
"XNNPACK/src/operators/average-pooling-nhwc.c",
"XNNPACK/src/operators/batch-matrix-multiply-nc.c",
"XNNPACK/src/operators/binary-elementwise-nd.c",
"XNNPACK/src/operators/channel-shuffle-nc.c",
"XNNPACK/src/operators/constant-pad-nd.c",
"XNNPACK/src/operators/convolution-nchw.c",
"XNNPACK/src/operators/convolution-nhwc.c",
"XNNPACK/src/operators/deconvolution-nhwc.c",
"XNNPACK/src/operators/dynamic-fully-connected-nc.c",
"XNNPACK/src/operators/fully-connected-nc.c",
"XNNPACK/src/operators/global-average-pooling-ncw.c",
"XNNPACK/src/operators/global-average-pooling-nwc.c",
"XNNPACK/src/operators/lut-elementwise-nc.c",
"XNNPACK/src/operators/max-pooling-nhwc.c",
"XNNPACK/src/operators/prelu-nc.c",
"XNNPACK/src/operators/reduce-nd.c",
"XNNPACK/src/operators/resize-bilinear-nchw.c",
"XNNPACK/src/operators/resize-bilinear-nhwc.c",
"XNNPACK/src/operators/rope-nthc.c",
"XNNPACK/src/operators/scaled-dot-product-attention-nhtc.c",
"XNNPACK/src/operators/slice-nd.c",
"XNNPACK/src/operators/softmax-nc.c",
"XNNPACK/src/operators/transpose-nd.c",
"XNNPACK/src/operators/unary-elementwise-nc.c",
"XNNPACK/src/operators/unpooling-nhwc.c",
]
PROD_NEONI8MM_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neoni8mm.c",
]
PROD_AVX512F_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512f.c",
]
JIT_SRCS = [
]
PROD_F16C_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/f16c.c",
]
PROD_NEON_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neon.c",
]
PROD_SCALAR_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/scalar.c",
]
PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondot-aarch64.c",
]
PROD_FMA3_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/fma3.c",
]
LOGGING_SRCS = [
"XNNPACK/src/enums/allocation-type.c",
"XNNPACK/src/enums/datatype-strings.c",
"XNNPACK/src/enums/microkernel-type.c",
"XNNPACK/src/enums/node-type.c",
@ -282,8 +474,27 @@ LOGGING_SRCS = [
"XNNPACK/src/log.c",
]
PROD_NEONI8MM_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neoni8mm.c",
PROD_NEONFMA_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfma.c",
]
PROD_AVX2_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx2.c",
]
PROD_AVX512VBMI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512vbmi.c",
]
PROD_RVV_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/rvv.c",
]
PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondotfp16arith.c",
]
PROD_XOP_MICROKERNEL_SRCS = [
]
AARCH32_ASM_MICROKERNEL_SRCS = [
@ -376,134 +587,10 @@ AARCH32_ASM_MICROKERNEL_SRCS = [
"XNNPACK/src/u32-filterbank-accumulate/u32-filterbank-accumulate-asm-aarch32-neon-x2.S",
]
PROD_F16C_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/f16c.c",
]
PROD_XOP_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/xop.c",
]
PROD_RVV_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/rvv.c",
]
SUBGRAPH_SRCS = [
"XNNPACK/src/memory-planner.c",
"XNNPACK/src/runtime.c",
"XNNPACK/src/subgraph.c",
"XNNPACK/src/subgraph/abs.c",
"XNNPACK/src/subgraph/add2.c",
"XNNPACK/src/subgraph/argmax-pooling-2d.c",
"XNNPACK/src/subgraph/average-pooling-2d.c",
"XNNPACK/src/subgraph/bankers-rounding.c",
"XNNPACK/src/subgraph/batch-matrix-multiply.c",
"XNNPACK/src/subgraph/ceiling.c",
"XNNPACK/src/subgraph/clamp.c",
"XNNPACK/src/subgraph/concatenate.c",
"XNNPACK/src/subgraph/convert.c",
"XNNPACK/src/subgraph/convolution-2d.c",
"XNNPACK/src/subgraph/copy.c",
"XNNPACK/src/subgraph/deconvolution-2d.c",
"XNNPACK/src/subgraph/depth-to-space-2d.c",
"XNNPACK/src/subgraph/depthwise-convolution-2d.c",
"XNNPACK/src/subgraph/divide.c",
"XNNPACK/src/subgraph/elu.c",
"XNNPACK/src/subgraph/even-split.c",
"XNNPACK/src/subgraph/floor.c",
"XNNPACK/src/subgraph/fully-connected-sparse.c",
"XNNPACK/src/subgraph/fully-connected.c",
"XNNPACK/src/subgraph/global-average-pooling.c",
"XNNPACK/src/subgraph/global-sum-pooling.c",
"XNNPACK/src/subgraph/hardswish.c",
"XNNPACK/src/subgraph/leaky-relu.c",
"XNNPACK/src/subgraph/max-pooling-2d.c",
"XNNPACK/src/subgraph/maximum2.c",
"XNNPACK/src/subgraph/minimum2.c",
"XNNPACK/src/subgraph/multiply2.c",
"XNNPACK/src/subgraph/negate.c",
"XNNPACK/src/subgraph/prelu.c",
"XNNPACK/src/subgraph/reshape-helpers.c",
"XNNPACK/src/subgraph/scaled-dot-product-attention.c",
"XNNPACK/src/subgraph/sigmoid.c",
"XNNPACK/src/subgraph/softmax.c",
"XNNPACK/src/subgraph/space-to-depth-2d.c",
"XNNPACK/src/subgraph/square-root.c",
"XNNPACK/src/subgraph/square.c",
"XNNPACK/src/subgraph/squared-difference.c",
"XNNPACK/src/subgraph/static-constant-pad.c",
"XNNPACK/src/subgraph/static-mean.c",
"XNNPACK/src/subgraph/static-reshape.c",
"XNNPACK/src/subgraph/static-resize-bilinear-2d.c",
"XNNPACK/src/subgraph/static-slice.c",
"XNNPACK/src/subgraph/static-transpose.c",
"XNNPACK/src/subgraph/subtract.c",
"XNNPACK/src/subgraph/tanh.c",
"XNNPACK/src/subgraph/unpooling-2d.c",
"XNNPACK/src/subgraph/validation.c",
"XNNPACK/src/tensor.c",
]
PROD_FMA3_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/fma3.c",
]
PROD_AVX512SKX_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512skx.c",
]
JIT_SRCS = [
"XNNPACK/src/jit/aarch32-assembler.cc",
"XNNPACK/src/jit/aarch64-assembler.cc",
"XNNPACK/src/jit/assembler.cc",
]
PROD_NEONFP16_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfp16.c",
]
PROD_SSSE3_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/ssse3.c",
]
XNNPACK_SRCS = [
"XNNPACK/src/configs/argmaxpool-config.c",
"XNNPACK/src/configs/avgpool-config.c",
"XNNPACK/src/configs/binary-elementwise-config.c",
"XNNPACK/src/configs/cmul-config.c",
"XNNPACK/src/configs/conv-hwc2chw-config.c",
"XNNPACK/src/configs/dwconv-config.c",
"XNNPACK/src/configs/dwconv2d-chw-config.c",
"XNNPACK/src/configs/experiments-config.c",
"XNNPACK/src/configs/gavgpool-config.c",
"XNNPACK/src/configs/gavgpool-cw-config.c",
"XNNPACK/src/configs/gemm-config.c",
"XNNPACK/src/configs/ibilinear-chw-config.c",
"XNNPACK/src/configs/ibilinear-config.c",
"XNNPACK/src/configs/lut32norm-config.c",
"XNNPACK/src/configs/maxpool-config.c",
"XNNPACK/src/configs/pavgpool-config.c",
"XNNPACK/src/configs/prelu-config.c",
"XNNPACK/src/configs/raddstoreexpminusmax-config.c",
"XNNPACK/src/configs/reduce-config.c",
"XNNPACK/src/configs/rmax-config.c",
"XNNPACK/src/configs/spmm-config.c",
"XNNPACK/src/configs/transpose-config.c",
"XNNPACK/src/configs/unary-elementwise-config.c",
"XNNPACK/src/configs/unpool-config.c",
"XNNPACK/src/configs/vmulcaddc-config.c",
"XNNPACK/src/configs/xx-fill-config.c",
"XNNPACK/src/configs/xx-pad-config.c",
"XNNPACK/src/configs/x8-lut-config.c",
"XNNPACK/src/configs/zip-config.c",
"XNNPACK/src/init.c",
"XNNPACK/src/params.c",
]
PROD_FP16ARITH_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/fp16arith.c",
]
TABLE_SRCS = [
"XNNPACK/src/tables/exp2-k-over-64.c",
"XNNPACK/src/tables/exp2-k-over-2048.c",
@ -516,105 +603,7 @@ TABLE_SRCS = [
"XNNPACK/src/tables/vlog.c",
]
PROD_NEON_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neon.c",
]
PROD_AVXVNNI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avxvnni.c",
]
PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfp16arith.c",
]
PROD_SSE_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/sse.c",
]
PROD_NEON_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neon-aarch64.c",
"XNNPACK/src/amalgam/gen/neonfma-aarch64.c",
]
PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondotfp16-aarch64.c",
]
PROD_NEONFMA_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfma.c",
]
PROD_FMA_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/fma.c",
]
PROD_SSE2_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/sse2.c",
]
PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512vnnigfni.c",
]
PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neonfp16arith-aarch64.c",
]
PROD_AVX2_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx2.c",
]
OPERATOR_SRCS = [
"XNNPACK/src/operator-delete.c",
"XNNPACK/src/operators/argmax-pooling-nhwc.c",
"XNNPACK/src/operators/average-pooling-nhwc.c",
"XNNPACK/src/operators/batch-matrix-multiply-nc.c",
"XNNPACK/src/operators/binary-elementwise-nd.c",
"XNNPACK/src/operators/channel-shuffle-nc.c",
"XNNPACK/src/operators/constant-pad-nd.c",
"XNNPACK/src/operators/convolution-nchw.c",
"XNNPACK/src/operators/convolution-nhwc.c",
"XNNPACK/src/operators/deconvolution-nhwc.c",
"XNNPACK/src/operators/dynamic-fully-connected-nc.c",
"XNNPACK/src/operators/fully-connected-nc.c",
"XNNPACK/src/operators/global-average-pooling-ncw.c",
"XNNPACK/src/operators/global-average-pooling-nwc.c",
"XNNPACK/src/operators/lut-elementwise-nc.c",
"XNNPACK/src/operators/max-pooling-nhwc.c",
"XNNPACK/src/operators/prelu-nc.c",
"XNNPACK/src/operators/reduce-nd.c",
"XNNPACK/src/operators/resize-bilinear-nchw.c",
"XNNPACK/src/operators/resize-bilinear-nhwc.c",
"XNNPACK/src/operators/rope-nthc.c",
"XNNPACK/src/operators/scaled-dot-product-attention-nhtc.c",
"XNNPACK/src/operators/slice-nd.c",
"XNNPACK/src/operators/softmax-nc.c",
"XNNPACK/src/operators/transpose-nd.c",
"XNNPACK/src/operators/unary-elementwise-nc.c",
"XNNPACK/src/operators/unpooling-nhwc.c",
]
PROD_AVX512VBMI_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/avx512vbmi.c",
]
PROD_NEONDOT_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondot.c",
]
PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondot-aarch64.c",
]
PROD_SSE41_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/sse41.c",
]
PROD_ARMSIMD32_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/armsimd32.c",
]
PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS = [
"XNNPACK/src/amalgam/gen/neondotfp16arith.c",
]

View File

@ -7,7 +7,6 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
]
PROD_FMA_MICROKERNEL_SRCS = [
"xnnpack_wrappers/amalgam/gen/fma.c",
]
PROD_ARMSIMD32_MICROKERNEL_SRCS = [
@ -92,7 +91,6 @@ PROD_F16C_MICROKERNEL_SRCS = [
]
PROD_XOP_MICROKERNEL_SRCS = [
"xnnpack_wrappers/amalgam/gen/xop.c",
]
PROD_FMA3_MICROKERNEL_SRCS = [
@ -447,28 +445,16 @@ AARCH64_ASM_MICROKERNEL_SRCS = [
"xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S",
"xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
]