mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Update PyTorch for XNNPACK 87ee0b4 (#134518)
Summary: Update XNNPACK library version. Test Plan: Combined diff CI is clean: D61586079 (all changes, has to be split out for export). Differential Revision: D61822610 Pull Request resolved: https://github.com/pytorch/pytorch/pull/134518 Approved by: https://github.com/mcr229
This commit is contained in:
parent
042b733ddd
commit
3b40b07efb
|
|
@ -544,6 +544,11 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
|
|||
# Disable I8MM For CI since clang 9 does not support neon i8mm.
|
||||
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
|
||||
|
||||
# Older MSVC versions don't support AVX512FP. TODO Minimum version support?
|
||||
IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||
set(XNNPACK_ENABLE_AVX512FP16 OFF CACHE BOOL "")
|
||||
ENDIF()
|
||||
|
||||
# Conditionally disable AVX512AMX, as it requires Clang 11 or later. Note that
|
||||
# XNNPACK does conditionally compile this based on GCC version. Once it also does
|
||||
# so based on Clang version, this logic can be removed.
|
||||
|
|
|
|||
2
third_party/XNNPACK
vendored
2
third_party/XNNPACK
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit fcbf55af6cf28a4627bcd1f703ab7ad843f0f3a2
|
||||
Subproject commit 87ee0b46b834f67bad9025d4a82ed5654f3403d3
|
||||
17
third_party/generate-xnnpack-wrappers.py
vendored
17
third_party/generate-xnnpack-wrappers.py
vendored
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import print_function
|
||||
from pathlib import Path
|
||||
import collections
|
||||
import os
|
||||
import sys
|
||||
|
|
@ -99,6 +100,7 @@ def handle_singleline_parse(line):
|
|||
return key_val[0], [x[4:] for x in key_val[1:]]
|
||||
|
||||
def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
|
||||
print(f"Updating sources from {cmakefile}")
|
||||
sources = collections.defaultdict(list)
|
||||
with open(os.path.join(xnnpack_path, cmakefile)) as cmake:
|
||||
lines = cmake.readlines()
|
||||
|
|
@ -106,6 +108,17 @@ def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
|
|||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
if lines[i].startswith("INCLUDE"):
|
||||
file, _ = handle_singleline_parse(line)
|
||||
if file.startswith("cmake/gen/"):
|
||||
path = Path(xnnpack_path) / "XNNPACK" / file
|
||||
local_sources = update_sources(xnnpack_path, path.absolute().as_posix())
|
||||
for k,v in local_sources.items():
|
||||
if k in sources:
|
||||
sources[k] = sources[k] + local_sources[k]
|
||||
else:
|
||||
sources[k] = local_sources[k]
|
||||
|
||||
if lines[i].startswith("SET") and "src/" in lines[i]:
|
||||
name, val = handle_singleline_parse(line)
|
||||
sources[name].extend(val)
|
||||
|
|
@ -132,7 +145,7 @@ def gen_wrappers(xnnpack_path):
|
|||
xnnpack_sources = collections.defaultdict(list)
|
||||
sources = update_sources(xnnpack_path)
|
||||
|
||||
microkernels_sources = update_sources(xnnpack_path, "XNNPACK/cmake/microkernels.cmake")
|
||||
microkernels_sources = update_sources(xnnpack_path, "XNNPACK/cmake/gen/microkernels.cmake")
|
||||
for key in microkernels_sources:
|
||||
sources[key] = microkernels_sources[key]
|
||||
|
||||
|
|
@ -186,6 +199,8 @@ def gen_wrappers(xnnpack_path):
|
|||
|
||||
|
||||
def main(argv):
|
||||
print("Generating wrappers...")
|
||||
|
||||
if argv is None or len(argv) == 0:
|
||||
gen_wrappers(".")
|
||||
else:
|
||||
|
|
|
|||
56
third_party/xnnpack.buck.bzl
vendored
56
third_party/xnnpack.buck.bzl
vendored
|
|
@ -4,7 +4,6 @@ load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
|
|||
load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX", "WINDOWS")
|
||||
load(
|
||||
":xnnpack_src_defs.bzl",
|
||||
"JIT_SRCS",
|
||||
"LOGGING_SRCS",
|
||||
"OPERATOR_SRCS",
|
||||
"SUBGRAPH_SRCS",
|
||||
|
|
@ -108,7 +107,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
preferred_linkage = "static",
|
||||
preprocessor_flags = [
|
||||
"-DXNN_LOG_LEVEL=0",
|
||||
"-DXNN_ENABLE_JIT=0",
|
||||
"-DXNN_ENABLE_SPARSE=0",
|
||||
"-DXNN_ENABLE_MEMOPT",
|
||||
],
|
||||
|
|
@ -154,37 +152,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
],
|
||||
)
|
||||
|
||||
fb_xplat_cxx_library(
|
||||
name = "jit_memory",
|
||||
# srcs have to include HOT_SRCS to be able to build on ARVR
|
||||
srcs = JIT_SRCS,
|
||||
headers = subdir_glob([
|
||||
("XNNPACK/src", "**/*.h"),
|
||||
]),
|
||||
header_namespace = "",
|
||||
apple_sdks = (IOS, MACOSX, APPLETVOS),
|
||||
compiler_flags = [
|
||||
"-Oz",
|
||||
],
|
||||
fbobjc_preprocessor_flags = [
|
||||
"-DXNN_PRIVATE=",
|
||||
"-DXNN_INTERNAL=",
|
||||
],
|
||||
labels = labels,
|
||||
platforms = (APPLE, ANDROID, CXX, WINDOWS),
|
||||
preferred_linkage = "static",
|
||||
preprocessor_flags = [
|
||||
"-DXNN_LOG_LEVEL=0",
|
||||
],
|
||||
visibility = ["PUBLIC"],
|
||||
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS,
|
||||
windows_compiler_flags_override = WINDOWS_FLAGS,
|
||||
deps = [
|
||||
":interface",
|
||||
third_party("clog"),
|
||||
],
|
||||
)
|
||||
|
||||
fb_xplat_cxx_library(
|
||||
name = "ukernels_scalar",
|
||||
srcs = PROD_SCALAR_MICROKERNEL_SRCS,
|
||||
|
|
@ -792,6 +759,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
preprocessor_flags = [
|
||||
"-DXNN_LOG_LEVEL=0",
|
||||
],
|
||||
exported_preprocessor_flags = [
|
||||
"-DXNN_ENABLE_AVX512VNNI"
|
||||
],
|
||||
visibility = ["PUBLIC"],
|
||||
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
|
||||
windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
|
||||
|
|
@ -833,6 +803,9 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
preprocessor_flags = [
|
||||
"-DXNN_LOG_LEVEL=0",
|
||||
],
|
||||
exported_preprocessor_flags = [
|
||||
"-DXNN_ENABLE_AVX512VNNI"
|
||||
],
|
||||
visibility = ["PUBLIC"],
|
||||
windows_clang_compiler_flags_override = WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS + ["-mavx"],
|
||||
windows_compiler_flags_override = WINDOWS_FLAGS + ["-mavx"],
|
||||
|
|
@ -1328,6 +1301,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
"-mf16c",
|
||||
],
|
||||
windows_compiler_flags_override = WINDOWS_FLAGS + [
|
||||
"/D__AVX2__",
|
||||
"-mavx2",
|
||||
"-mfma",
|
||||
"-mf16c",
|
||||
|
|
@ -1576,6 +1550,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
"-mavx512bw",
|
||||
"-mavx512dq",
|
||||
"-mavx512vl",
|
||||
|
||||
],
|
||||
deps = [
|
||||
":interface",
|
||||
|
|
@ -1633,6 +1608,7 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
"-mavx512bw",
|
||||
"-mavx512dq",
|
||||
"-mavx512vl",
|
||||
"/D__AVX512BW__",
|
||||
],
|
||||
windows_srcs = PROD_AVX512SKX_MICROKERNEL_SRCS,
|
||||
deps = [
|
||||
|
|
@ -2463,7 +2439,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
windows_compiler_flags_override = WINDOWS_FLAGS,
|
||||
deps = [
|
||||
":interface",
|
||||
":jit_memory",
|
||||
third_party("FP16"),
|
||||
],
|
||||
)
|
||||
|
|
@ -2507,7 +2482,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
windows_compiler_flags_override = WINDOWS_FLAGS,
|
||||
deps = [
|
||||
":interface",
|
||||
":jit_memory",
|
||||
third_party("FP16"),
|
||||
],
|
||||
)
|
||||
|
|
@ -2519,7 +2493,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
preferred_linkage = "static",
|
||||
visibility = ["PUBLIC"],
|
||||
deps = [
|
||||
":jit_memory",
|
||||
":ukernels_asm_aarch64",
|
||||
":ukernels_neon",
|
||||
":ukernels_neon_aarch64",
|
||||
|
|
@ -2581,10 +2554,13 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
":ukernels_ssse3_ovr_win32",
|
||||
":ukernels_xop_ovr_win32",
|
||||
":ukernels_avx512vbmi",
|
||||
":ukernels_avx512vnni_ovr_win32",
|
||||
":ukernels_avx512vnnigfni_ovr_win32",
|
||||
# ":ukernels_avx512vnni_ovr_win32", # Build crashes on Windows Clang 17.0.3, re-enable when fixed (T199959765)
|
||||
# ":ukernels_avx512vnnigfni_ovr_win32",
|
||||
# ":ukernels_avxvnni_ovr_win32" Excluding avxvnni microkernels because they fail on older compilers
|
||||
],
|
||||
exported_preprocessor_flags = [
|
||||
"-DXNN_ENABLE_AVX512VNNIGFNI=0"
|
||||
]
|
||||
)
|
||||
|
||||
fb_xplat_cxx_library(
|
||||
|
|
@ -2594,7 +2570,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
preferred_linkage = "static",
|
||||
visibility = ["PUBLIC"],
|
||||
deps = [
|
||||
":jit_memory",
|
||||
":ukernels_armsimd32",
|
||||
":ukernels_asm_aarch32",
|
||||
":ukernels_asm_aarch64",
|
||||
|
|
@ -2622,7 +2597,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
preferred_linkage = "static",
|
||||
visibility = ["PUBLIC"],
|
||||
deps = [
|
||||
":jit_memory",
|
||||
":ukernels_asm_aarch32",
|
||||
":ukernels_neon",
|
||||
":ukernels_neon_dot",
|
||||
|
|
@ -2690,7 +2664,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
"-DXNN_NO_X8_OPERATORS",
|
||||
"-DXNN_ENABLE_MEMOPT",
|
||||
"-DXNN_ENABLE_SPARSE=0",
|
||||
"-DXNN_ENABLE_JIT=0",
|
||||
"-DXNN_ENABLE_ASSEMBLY",
|
||||
"-DXNN_ENABLE_GEMM_M_SPECIALIZATION",
|
||||
"-DXNN_ENABLE_ARM_DOTPROD",
|
||||
|
|
@ -2712,7 +2685,6 @@ def define_xnnpack(third_party, labels = [], XNNPACK_WINDOWS_AVX512F_ENABLED = F
|
|||
"XNNPACK/src/memory.c",
|
||||
"XNNPACK/src/mutex.c",
|
||||
"XNNPACK/src/microparams-init.c",
|
||||
"XNNPACK/src/operators/post-operation.c",
|
||||
],
|
||||
visibility = ["PUBLIC"],
|
||||
windows_clang_compiler_flags_override = (WINDOWS_FLAGS + WINDOWS_CLANG_COMPILER_FLAGS) if XNNPACK_WINDOWS_AVX512F_ENABLED else WINDOWS_FLAGS,
|
||||
|
|
|
|||
477
third_party/xnnpack_src_defs.bzl
vendored
477
third_party/xnnpack_src_defs.bzl
vendored
|
|
@ -2,16 +2,12 @@
|
|||
Auto-generated by generate-wrappers.py script. Do not modify
|
||||
"""
|
||||
|
||||
PROD_SCALAR_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/scalar.c",
|
||||
PROD_ARMSIMD32_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/armsimd32.c",
|
||||
]
|
||||
|
||||
PROD_AVX512VNNI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512vnni.c",
|
||||
]
|
||||
|
||||
PROD_AVX512F_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512f.c",
|
||||
PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfp16arith-aarch64.c",
|
||||
]
|
||||
|
||||
AARCH64_ASM_MICROKERNEL_SRCS = [
|
||||
|
|
@ -240,41 +236,237 @@ AARCH64_ASM_MICROKERNEL_SRCS = [
|
|||
"XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S",
|
||||
"XNNPACK/src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"XNNPACK/src/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
]
|
||||
|
||||
PROD_AVXVNNI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avxvnni.c",
|
||||
]
|
||||
|
||||
SUBGRAPH_SRCS = [
|
||||
"XNNPACK/src/memory-planner.c",
|
||||
"XNNPACK/src/runtime.c",
|
||||
"XNNPACK/src/subgraph.c",
|
||||
"XNNPACK/src/subgraph/abs.c",
|
||||
"XNNPACK/src/subgraph/add2.c",
|
||||
"XNNPACK/src/subgraph/argmax-pooling-2d.c",
|
||||
"XNNPACK/src/subgraph/average-pooling-2d.c",
|
||||
"XNNPACK/src/subgraph/bankers-rounding.c",
|
||||
"XNNPACK/src/subgraph/batch-matrix-multiply.c",
|
||||
"XNNPACK/src/subgraph/ceiling.c",
|
||||
"XNNPACK/src/subgraph/clamp.c",
|
||||
"XNNPACK/src/subgraph/concatenate.c",
|
||||
"XNNPACK/src/subgraph/convert.c",
|
||||
"XNNPACK/src/subgraph/convolution-2d.c",
|
||||
"XNNPACK/src/subgraph/copy.c",
|
||||
"XNNPACK/src/subgraph/copysign.c",
|
||||
"XNNPACK/src/subgraph/deconvolution-2d.c",
|
||||
"XNNPACK/src/subgraph/depth-to-space-2d.c",
|
||||
"XNNPACK/src/subgraph/depthwise-convolution-2d.c",
|
||||
"XNNPACK/src/subgraph/divide.c",
|
||||
"XNNPACK/src/subgraph/elu.c",
|
||||
"XNNPACK/src/subgraph/even-split.c",
|
||||
"XNNPACK/src/subgraph/exp.c",
|
||||
"XNNPACK/src/subgraph/floor.c",
|
||||
"XNNPACK/src/subgraph/fully-connected-sparse.c",
|
||||
"XNNPACK/src/subgraph/fully-connected.c",
|
||||
"XNNPACK/src/subgraph/gelu.c",
|
||||
"XNNPACK/src/subgraph/global-average-pooling.c",
|
||||
"XNNPACK/src/subgraph/global-sum-pooling.c",
|
||||
"XNNPACK/src/subgraph/hardswish.c",
|
||||
"XNNPACK/src/subgraph/leaky-relu.c",
|
||||
"XNNPACK/src/subgraph/log.c",
|
||||
"XNNPACK/src/subgraph/max-pooling-2d.c",
|
||||
"XNNPACK/src/subgraph/maximum2.c",
|
||||
"XNNPACK/src/subgraph/minimum2.c",
|
||||
"XNNPACK/src/subgraph/multiply2.c",
|
||||
"XNNPACK/src/subgraph/negate.c",
|
||||
"XNNPACK/src/subgraph/prelu.c",
|
||||
"XNNPACK/src/subgraph/reciprocal-square-root.c",
|
||||
"XNNPACK/src/subgraph/reshape-helpers.c",
|
||||
"XNNPACK/src/subgraph/scaled-dot-product-attention.c",
|
||||
"XNNPACK/src/subgraph/sigmoid.c",
|
||||
"XNNPACK/src/subgraph/softmax.c",
|
||||
"XNNPACK/src/subgraph/space-to-depth-2d.c",
|
||||
"XNNPACK/src/subgraph/square-root.c",
|
||||
"XNNPACK/src/subgraph/square.c",
|
||||
"XNNPACK/src/subgraph/squared-difference.c",
|
||||
"XNNPACK/src/subgraph/static-constant-pad.c",
|
||||
"XNNPACK/src/subgraph/static-mean.c",
|
||||
"XNNPACK/src/subgraph/static-reshape.c",
|
||||
"XNNPACK/src/subgraph/static-resize-bilinear-2d.c",
|
||||
"XNNPACK/src/subgraph/static-slice.c",
|
||||
"XNNPACK/src/subgraph/static-transpose.c",
|
||||
"XNNPACK/src/subgraph/subtract.c",
|
||||
"XNNPACK/src/subgraph/tanh.c",
|
||||
"XNNPACK/src/subgraph/unpooling-2d.c",
|
||||
"XNNPACK/src/subgraph/validation.c",
|
||||
"XNNPACK/src/tensor.c",
|
||||
]
|
||||
|
||||
PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512vnnigfni.c",
|
||||
]
|
||||
|
||||
PROD_AVX512VNNI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512vnni.c",
|
||||
]
|
||||
|
||||
PROD_SSE2_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/sse2.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOT_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondot.c",
|
||||
]
|
||||
|
||||
PROD_SSE41_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/sse41.c",
|
||||
]
|
||||
|
||||
PROD_SSE_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/sse.c",
|
||||
]
|
||||
|
||||
PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfp16arith.c",
|
||||
]
|
||||
|
||||
PROD_NEONV8_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonv8.c",
|
||||
]
|
||||
|
||||
PROD_NEONFP16_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfp16.c",
|
||||
]
|
||||
|
||||
XNNPACK_SRCS = [
|
||||
"XNNPACK/src/configs/argmaxpool-config.c",
|
||||
"XNNPACK/src/configs/avgpool-config.c",
|
||||
"XNNPACK/src/configs/binary-elementwise-config.c",
|
||||
"XNNPACK/src/configs/cmul-config.c",
|
||||
"XNNPACK/src/configs/conv-hwc2chw-config.c",
|
||||
"XNNPACK/src/configs/dwconv-config.c",
|
||||
"XNNPACK/src/configs/dwconv2d-chw-config.c",
|
||||
"XNNPACK/src/configs/experiments-config.c",
|
||||
"XNNPACK/src/configs/gavgpool-config.c",
|
||||
"XNNPACK/src/configs/gavgpool-cw-config.c",
|
||||
"XNNPACK/src/configs/gemm-config.c",
|
||||
"XNNPACK/src/configs/ibilinear-chw-config.c",
|
||||
"XNNPACK/src/configs/ibilinear-config.c",
|
||||
"XNNPACK/src/configs/lut32norm-config.c",
|
||||
"XNNPACK/src/configs/maxpool-config.c",
|
||||
"XNNPACK/src/configs/pavgpool-config.c",
|
||||
"XNNPACK/src/configs/prelu-config.c",
|
||||
"XNNPACK/src/configs/raddstoreexpminusmax-config.c",
|
||||
"XNNPACK/src/configs/reduce-config.c",
|
||||
"XNNPACK/src/configs/rmax-config.c",
|
||||
"XNNPACK/src/configs/spmm-config.c",
|
||||
"XNNPACK/src/configs/transpose-config.c",
|
||||
"XNNPACK/src/configs/unary-elementwise-config.c",
|
||||
"XNNPACK/src/configs/unpool-config.c",
|
||||
"XNNPACK/src/configs/vmulcaddc-config.c",
|
||||
"XNNPACK/src/configs/xx-fill-config.c",
|
||||
"XNNPACK/src/configs/xx-pad-config.c",
|
||||
"XNNPACK/src/configs/x8-lut-config.c",
|
||||
"XNNPACK/src/configs/zip-config.c",
|
||||
"XNNPACK/src/init.c",
|
||||
"XNNPACK/src/params.c",
|
||||
]
|
||||
|
||||
PROD_AVX_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx.c",
|
||||
]
|
||||
|
||||
PROD_AVX512SKX_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512skx.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondotfp16-aarch64.c",
|
||||
]
|
||||
|
||||
PROD_FP16ARITH_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/fp16arith.c",
|
||||
]
|
||||
|
||||
PROD_FMA_MICROKERNEL_SRCS = [
|
||||
]
|
||||
|
||||
OPERATOR_SRCS = [
|
||||
"XNNPACK/src/operator-delete.c",
|
||||
"XNNPACK/src/operators/argmax-pooling-nhwc.c",
|
||||
"XNNPACK/src/operators/average-pooling-nhwc.c",
|
||||
"XNNPACK/src/operators/batch-matrix-multiply-nc.c",
|
||||
"XNNPACK/src/operators/binary-elementwise-nd.c",
|
||||
"XNNPACK/src/operators/channel-shuffle-nc.c",
|
||||
"XNNPACK/src/operators/constant-pad-nd.c",
|
||||
"XNNPACK/src/operators/convolution-nchw.c",
|
||||
"XNNPACK/src/operators/convolution-nhwc.c",
|
||||
"XNNPACK/src/operators/deconvolution-nhwc.c",
|
||||
"XNNPACK/src/operators/dynamic-fully-connected-nc.c",
|
||||
"XNNPACK/src/operators/fully-connected-nc.c",
|
||||
"XNNPACK/src/operators/global-average-pooling-ncw.c",
|
||||
"XNNPACK/src/operators/global-average-pooling-nwc.c",
|
||||
"XNNPACK/src/operators/lut-elementwise-nc.c",
|
||||
"XNNPACK/src/operators/max-pooling-nhwc.c",
|
||||
"XNNPACK/src/operators/prelu-nc.c",
|
||||
"XNNPACK/src/operators/reduce-nd.c",
|
||||
"XNNPACK/src/operators/resize-bilinear-nchw.c",
|
||||
"XNNPACK/src/operators/resize-bilinear-nhwc.c",
|
||||
"XNNPACK/src/operators/rope-nthc.c",
|
||||
"XNNPACK/src/operators/scaled-dot-product-attention-nhtc.c",
|
||||
"XNNPACK/src/operators/slice-nd.c",
|
||||
"XNNPACK/src/operators/softmax-nc.c",
|
||||
"XNNPACK/src/operators/transpose-nd.c",
|
||||
"XNNPACK/src/operators/unary-elementwise-nc.c",
|
||||
"XNNPACK/src/operators/unpooling-nhwc.c",
|
||||
]
|
||||
|
||||
PROD_NEONI8MM_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neoni8mm.c",
|
||||
]
|
||||
|
||||
PROD_AVX512F_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512f.c",
|
||||
]
|
||||
|
||||
JIT_SRCS = [
|
||||
]
|
||||
|
||||
PROD_F16C_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/f16c.c",
|
||||
]
|
||||
|
||||
PROD_NEON_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neon.c",
|
||||
]
|
||||
|
||||
PROD_SCALAR_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/scalar.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondot-aarch64.c",
|
||||
]
|
||||
|
||||
PROD_FMA3_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/fma3.c",
|
||||
]
|
||||
|
||||
LOGGING_SRCS = [
|
||||
"XNNPACK/src/enums/allocation-type.c",
|
||||
"XNNPACK/src/enums/datatype-strings.c",
|
||||
"XNNPACK/src/enums/microkernel-type.c",
|
||||
"XNNPACK/src/enums/node-type.c",
|
||||
|
|
@ -282,8 +474,27 @@ LOGGING_SRCS = [
|
|||
"XNNPACK/src/log.c",
|
||||
]
|
||||
|
||||
PROD_NEONI8MM_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neoni8mm.c",
|
||||
PROD_NEONFMA_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfma.c",
|
||||
]
|
||||
|
||||
PROD_AVX2_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx2.c",
|
||||
]
|
||||
|
||||
PROD_AVX512VBMI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512vbmi.c",
|
||||
]
|
||||
|
||||
PROD_RVV_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/rvv.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondotfp16arith.c",
|
||||
]
|
||||
|
||||
PROD_XOP_MICROKERNEL_SRCS = [
|
||||
]
|
||||
|
||||
AARCH32_ASM_MICROKERNEL_SRCS = [
|
||||
|
|
@ -376,134 +587,10 @@ AARCH32_ASM_MICROKERNEL_SRCS = [
|
|||
"XNNPACK/src/u32-filterbank-accumulate/u32-filterbank-accumulate-asm-aarch32-neon-x2.S",
|
||||
]
|
||||
|
||||
PROD_F16C_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/f16c.c",
|
||||
]
|
||||
|
||||
PROD_XOP_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/xop.c",
|
||||
]
|
||||
|
||||
PROD_RVV_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/rvv.c",
|
||||
]
|
||||
|
||||
SUBGRAPH_SRCS = [
|
||||
"XNNPACK/src/memory-planner.c",
|
||||
"XNNPACK/src/runtime.c",
|
||||
"XNNPACK/src/subgraph.c",
|
||||
"XNNPACK/src/subgraph/abs.c",
|
||||
"XNNPACK/src/subgraph/add2.c",
|
||||
"XNNPACK/src/subgraph/argmax-pooling-2d.c",
|
||||
"XNNPACK/src/subgraph/average-pooling-2d.c",
|
||||
"XNNPACK/src/subgraph/bankers-rounding.c",
|
||||
"XNNPACK/src/subgraph/batch-matrix-multiply.c",
|
||||
"XNNPACK/src/subgraph/ceiling.c",
|
||||
"XNNPACK/src/subgraph/clamp.c",
|
||||
"XNNPACK/src/subgraph/concatenate.c",
|
||||
"XNNPACK/src/subgraph/convert.c",
|
||||
"XNNPACK/src/subgraph/convolution-2d.c",
|
||||
"XNNPACK/src/subgraph/copy.c",
|
||||
"XNNPACK/src/subgraph/deconvolution-2d.c",
|
||||
"XNNPACK/src/subgraph/depth-to-space-2d.c",
|
||||
"XNNPACK/src/subgraph/depthwise-convolution-2d.c",
|
||||
"XNNPACK/src/subgraph/divide.c",
|
||||
"XNNPACK/src/subgraph/elu.c",
|
||||
"XNNPACK/src/subgraph/even-split.c",
|
||||
"XNNPACK/src/subgraph/floor.c",
|
||||
"XNNPACK/src/subgraph/fully-connected-sparse.c",
|
||||
"XNNPACK/src/subgraph/fully-connected.c",
|
||||
"XNNPACK/src/subgraph/global-average-pooling.c",
|
||||
"XNNPACK/src/subgraph/global-sum-pooling.c",
|
||||
"XNNPACK/src/subgraph/hardswish.c",
|
||||
"XNNPACK/src/subgraph/leaky-relu.c",
|
||||
"XNNPACK/src/subgraph/max-pooling-2d.c",
|
||||
"XNNPACK/src/subgraph/maximum2.c",
|
||||
"XNNPACK/src/subgraph/minimum2.c",
|
||||
"XNNPACK/src/subgraph/multiply2.c",
|
||||
"XNNPACK/src/subgraph/negate.c",
|
||||
"XNNPACK/src/subgraph/prelu.c",
|
||||
"XNNPACK/src/subgraph/reshape-helpers.c",
|
||||
"XNNPACK/src/subgraph/scaled-dot-product-attention.c",
|
||||
"XNNPACK/src/subgraph/sigmoid.c",
|
||||
"XNNPACK/src/subgraph/softmax.c",
|
||||
"XNNPACK/src/subgraph/space-to-depth-2d.c",
|
||||
"XNNPACK/src/subgraph/square-root.c",
|
||||
"XNNPACK/src/subgraph/square.c",
|
||||
"XNNPACK/src/subgraph/squared-difference.c",
|
||||
"XNNPACK/src/subgraph/static-constant-pad.c",
|
||||
"XNNPACK/src/subgraph/static-mean.c",
|
||||
"XNNPACK/src/subgraph/static-reshape.c",
|
||||
"XNNPACK/src/subgraph/static-resize-bilinear-2d.c",
|
||||
"XNNPACK/src/subgraph/static-slice.c",
|
||||
"XNNPACK/src/subgraph/static-transpose.c",
|
||||
"XNNPACK/src/subgraph/subtract.c",
|
||||
"XNNPACK/src/subgraph/tanh.c",
|
||||
"XNNPACK/src/subgraph/unpooling-2d.c",
|
||||
"XNNPACK/src/subgraph/validation.c",
|
||||
"XNNPACK/src/tensor.c",
|
||||
]
|
||||
|
||||
PROD_FMA3_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/fma3.c",
|
||||
]
|
||||
|
||||
PROD_AVX512SKX_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512skx.c",
|
||||
]
|
||||
|
||||
JIT_SRCS = [
|
||||
"XNNPACK/src/jit/aarch32-assembler.cc",
|
||||
"XNNPACK/src/jit/aarch64-assembler.cc",
|
||||
"XNNPACK/src/jit/assembler.cc",
|
||||
]
|
||||
|
||||
PROD_NEONFP16_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfp16.c",
|
||||
]
|
||||
|
||||
PROD_SSSE3_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/ssse3.c",
|
||||
]
|
||||
|
||||
XNNPACK_SRCS = [
|
||||
"XNNPACK/src/configs/argmaxpool-config.c",
|
||||
"XNNPACK/src/configs/avgpool-config.c",
|
||||
"XNNPACK/src/configs/binary-elementwise-config.c",
|
||||
"XNNPACK/src/configs/cmul-config.c",
|
||||
"XNNPACK/src/configs/conv-hwc2chw-config.c",
|
||||
"XNNPACK/src/configs/dwconv-config.c",
|
||||
"XNNPACK/src/configs/dwconv2d-chw-config.c",
|
||||
"XNNPACK/src/configs/experiments-config.c",
|
||||
"XNNPACK/src/configs/gavgpool-config.c",
|
||||
"XNNPACK/src/configs/gavgpool-cw-config.c",
|
||||
"XNNPACK/src/configs/gemm-config.c",
|
||||
"XNNPACK/src/configs/ibilinear-chw-config.c",
|
||||
"XNNPACK/src/configs/ibilinear-config.c",
|
||||
"XNNPACK/src/configs/lut32norm-config.c",
|
||||
"XNNPACK/src/configs/maxpool-config.c",
|
||||
"XNNPACK/src/configs/pavgpool-config.c",
|
||||
"XNNPACK/src/configs/prelu-config.c",
|
||||
"XNNPACK/src/configs/raddstoreexpminusmax-config.c",
|
||||
"XNNPACK/src/configs/reduce-config.c",
|
||||
"XNNPACK/src/configs/rmax-config.c",
|
||||
"XNNPACK/src/configs/spmm-config.c",
|
||||
"XNNPACK/src/configs/transpose-config.c",
|
||||
"XNNPACK/src/configs/unary-elementwise-config.c",
|
||||
"XNNPACK/src/configs/unpool-config.c",
|
||||
"XNNPACK/src/configs/vmulcaddc-config.c",
|
||||
"XNNPACK/src/configs/xx-fill-config.c",
|
||||
"XNNPACK/src/configs/xx-pad-config.c",
|
||||
"XNNPACK/src/configs/x8-lut-config.c",
|
||||
"XNNPACK/src/configs/zip-config.c",
|
||||
"XNNPACK/src/init.c",
|
||||
"XNNPACK/src/params.c",
|
||||
]
|
||||
|
||||
PROD_FP16ARITH_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/fp16arith.c",
|
||||
]
|
||||
|
||||
TABLE_SRCS = [
|
||||
"XNNPACK/src/tables/exp2-k-over-64.c",
|
||||
"XNNPACK/src/tables/exp2-k-over-2048.c",
|
||||
|
|
@ -516,105 +603,7 @@ TABLE_SRCS = [
|
|||
"XNNPACK/src/tables/vlog.c",
|
||||
]
|
||||
|
||||
PROD_NEON_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neon.c",
|
||||
]
|
||||
|
||||
PROD_AVXVNNI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avxvnni.c",
|
||||
]
|
||||
|
||||
PROD_NEONFP16ARITH_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfp16arith.c",
|
||||
]
|
||||
|
||||
PROD_SSE_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/sse.c",
|
||||
]
|
||||
|
||||
PROD_NEON_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neon-aarch64.c",
|
||||
"XNNPACK/src/amalgam/gen/neonfma-aarch64.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondotfp16-aarch64.c",
|
||||
]
|
||||
|
||||
PROD_NEONFMA_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfma.c",
|
||||
]
|
||||
|
||||
PROD_FMA_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/fma.c",
|
||||
]
|
||||
|
||||
PROD_SSE2_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/sse2.c",
|
||||
]
|
||||
|
||||
PROD_AVX512VNNIGFNI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512vnnigfni.c",
|
||||
]
|
||||
|
||||
PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neonfp16arith-aarch64.c",
|
||||
]
|
||||
|
||||
PROD_AVX2_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx2.c",
|
||||
]
|
||||
|
||||
OPERATOR_SRCS = [
|
||||
"XNNPACK/src/operator-delete.c",
|
||||
"XNNPACK/src/operators/argmax-pooling-nhwc.c",
|
||||
"XNNPACK/src/operators/average-pooling-nhwc.c",
|
||||
"XNNPACK/src/operators/batch-matrix-multiply-nc.c",
|
||||
"XNNPACK/src/operators/binary-elementwise-nd.c",
|
||||
"XNNPACK/src/operators/channel-shuffle-nc.c",
|
||||
"XNNPACK/src/operators/constant-pad-nd.c",
|
||||
"XNNPACK/src/operators/convolution-nchw.c",
|
||||
"XNNPACK/src/operators/convolution-nhwc.c",
|
||||
"XNNPACK/src/operators/deconvolution-nhwc.c",
|
||||
"XNNPACK/src/operators/dynamic-fully-connected-nc.c",
|
||||
"XNNPACK/src/operators/fully-connected-nc.c",
|
||||
"XNNPACK/src/operators/global-average-pooling-ncw.c",
|
||||
"XNNPACK/src/operators/global-average-pooling-nwc.c",
|
||||
"XNNPACK/src/operators/lut-elementwise-nc.c",
|
||||
"XNNPACK/src/operators/max-pooling-nhwc.c",
|
||||
"XNNPACK/src/operators/prelu-nc.c",
|
||||
"XNNPACK/src/operators/reduce-nd.c",
|
||||
"XNNPACK/src/operators/resize-bilinear-nchw.c",
|
||||
"XNNPACK/src/operators/resize-bilinear-nhwc.c",
|
||||
"XNNPACK/src/operators/rope-nthc.c",
|
||||
"XNNPACK/src/operators/scaled-dot-product-attention-nhtc.c",
|
||||
"XNNPACK/src/operators/slice-nd.c",
|
||||
"XNNPACK/src/operators/softmax-nc.c",
|
||||
"XNNPACK/src/operators/transpose-nd.c",
|
||||
"XNNPACK/src/operators/unary-elementwise-nc.c",
|
||||
"XNNPACK/src/operators/unpooling-nhwc.c",
|
||||
]
|
||||
|
||||
PROD_AVX512VBMI_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/avx512vbmi.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOT_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondot.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondot-aarch64.c",
|
||||
]
|
||||
|
||||
PROD_SSE41_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/sse41.c",
|
||||
]
|
||||
|
||||
PROD_ARMSIMD32_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/armsimd32.c",
|
||||
]
|
||||
|
||||
PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS = [
|
||||
"XNNPACK/src/amalgam/gen/neondotfp16arith.c",
|
||||
]
|
||||
|
|
|
|||
14
third_party/xnnpack_wrapper_defs.bzl
vendored
14
third_party/xnnpack_wrapper_defs.bzl
vendored
|
|
@ -7,7 +7,6 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
|
|||
]
|
||||
|
||||
PROD_FMA_MICROKERNEL_SRCS = [
|
||||
"xnnpack_wrappers/amalgam/gen/fma.c",
|
||||
]
|
||||
|
||||
PROD_ARMSIMD32_MICROKERNEL_SRCS = [
|
||||
|
|
@ -92,7 +91,6 @@ PROD_F16C_MICROKERNEL_SRCS = [
|
|||
]
|
||||
|
||||
PROD_XOP_MICROKERNEL_SRCS = [
|
||||
"xnnpack_wrappers/amalgam/gen/xop.c",
|
||||
]
|
||||
|
||||
PROD_FMA3_MICROKERNEL_SRCS = [
|
||||
|
|
@ -447,28 +445,16 @@ AARCH64_ASM_MICROKERNEL_SRCS = [
|
|||
"xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld64.S",
|
||||
"xnnpack_wrappers/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qu8-gemm/gen/qu8-gemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x8c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53-prfm.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a53.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75-prfm.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-cortex-a75.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64-prfm.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16-minmax-rndnu-asm-aarch64-neon-mlal-lane-ld64.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-fp32-asm-aarch64-neondot-ld128.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-cortex-a55.S",
|
||||
"xnnpack_wrappers/qu8-igemm/gen/qu8-igemm-4x16c4-minmax-rndnu-asm-aarch64-neondot-ld128.S",
|
||||
]
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user