mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Update XNNPACK Version (#139913)
Updating XNNPACK Version to 4ea82e595b36106653175dcb04b2aa532660d0d8 submodule update Pull Request resolved: https://github.com/pytorch/pytorch/pull/139913 Approved by: https://github.com/digantdesai, https://github.com/huydhn
This commit is contained in:
parent
e429a3b72e
commit
cca34be584
|
|
@ -14,7 +14,7 @@ mkdir -p ${ZIP_DIR}/src
|
||||||
cp -R ${ARTIFACTS_DIR}/arm64/include ${ZIP_DIR}/install/
|
cp -R ${ARTIFACTS_DIR}/arm64/include ${ZIP_DIR}/install/
|
||||||
# build a FAT bianry
|
# build a FAT bianry
|
||||||
cd ${ZIP_DIR}/install/lib
|
cd ${ZIP_DIR}/install/lib
|
||||||
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpthreadpool.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a libXNNPACK.a)
|
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpthreadpool.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a libXNNPACK.a libmicrokernels-prod.a)
|
||||||
for lib in ${target_libs[*]}
|
for lib in ${target_libs[*]}
|
||||||
do
|
do
|
||||||
if [ -f "${ARTIFACTS_DIR}/x86_64/lib/${lib}" ] && [ -f "${ARTIFACTS_DIR}/arm64/lib/${lib}" ]; then
|
if [ -f "${ARTIFACTS_DIR}/x86_64/lib/${lib}" ] && [ -f "${ARTIFACTS_DIR}/arm64/lib/${lib}" ]; then
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,7 @@ if(ANDROID_ABI)
|
||||||
import_static_lib(libc10)
|
import_static_lib(libc10)
|
||||||
import_static_lib(libnnpack)
|
import_static_lib(libnnpack)
|
||||||
import_static_lib(libXNNPACK)
|
import_static_lib(libXNNPACK)
|
||||||
|
import_static_lib(libmicrokernels-prod)
|
||||||
import_static_lib(libpytorch_qnnpack)
|
import_static_lib(libpytorch_qnnpack)
|
||||||
import_static_lib(libpthreadpool)
|
import_static_lib(libpthreadpool)
|
||||||
import_static_lib(libeigen_blas)
|
import_static_lib(libeigen_blas)
|
||||||
|
|
@ -129,6 +130,7 @@ if(ANDROID_ABI)
|
||||||
libc10
|
libc10
|
||||||
libnnpack
|
libnnpack
|
||||||
libXNNPACK
|
libXNNPACK
|
||||||
|
libmicrokernels-prod
|
||||||
libpytorch_qnnpack
|
libpytorch_qnnpack
|
||||||
libpthreadpool
|
libpthreadpool
|
||||||
libeigen_blas
|
libeigen_blas
|
||||||
|
|
@ -151,6 +153,7 @@ else()
|
||||||
|
|
||||||
if(USE_XNNPACK)
|
if(USE_XNNPACK)
|
||||||
list(APPEND pytorch_jni_LIBS XNNPACK)
|
list(APPEND pytorch_jni_LIBS XNNPACK)
|
||||||
|
list(APPEND pytorch_jni_LIBS microkernels-prod)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(USE_SYSTEM_PTHREADPOOL)
|
if(USE_SYSTEM_PTHREADPOOL)
|
||||||
|
|
|
||||||
|
|
@ -234,62 +234,27 @@ Tensor qnnpack_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||||
|
|
||||||
#ifdef USE_XNNPACK
|
#ifdef USE_XNNPACK
|
||||||
C10_ALWAYS_INLINE
|
C10_ALWAYS_INLINE
|
||||||
enum xnn_status xnnp_create_add_nd(
|
enum xnn_status xnnp_define_q_tensor(const Tensor& tensor, MemoryFormat format, uint32_t& id, xnn_subgraph_t subgraph_ptr, uint32_t external_id, uint32_t flags){
|
||||||
int8_t azp,
|
Tensor contig_tensor = tensor.contiguous(format);
|
||||||
float ascale,
|
const auto tensor_shape = xnnp_utils::get_mem_format_aware_shape(contig_tensor);
|
||||||
int8_t bzp,
|
const int32_t zero_point = static_cast<int32_t>(contig_tensor.q_zero_point());
|
||||||
float bscale,
|
const float scale = static_cast<float>(contig_tensor.q_scale());
|
||||||
int8_t czp,
|
|
||||||
float cscale,
|
|
||||||
int8_t output_min,
|
|
||||||
int8_t output_max,
|
|
||||||
uint32_t flags,
|
|
||||||
xnn_operator_t* op) {
|
|
||||||
return xnn_create_add_nd_qs8(
|
|
||||||
azp, /* int8_t input1_zero_point */
|
|
||||||
ascale, /* float input1_scale */
|
|
||||||
bzp, /* int8_t input2_zero_point */
|
|
||||||
bscale, /* float input2_scale */
|
|
||||||
czp, /* int8_t output_zero_point */
|
|
||||||
cscale, /* float output_scale */
|
|
||||||
output_min, /* int8_t output_min */
|
|
||||||
output_max, /* int8_t output_max */
|
|
||||||
flags, /* uint32_t flags */
|
|
||||||
op); /* xnn_operator_t* add_op_out */
|
|
||||||
}
|
|
||||||
|
|
||||||
C10_ALWAYS_INLINE
|
return xnn_define_quantized_tensor_value(
|
||||||
enum xnn_status xnnp_reshape_add_nd(
|
subgraph_ptr,
|
||||||
xnn_operator_t op,
|
xnn_datatype_qint8,
|
||||||
const std::vector<size_t>& a_shape,
|
zero_point,
|
||||||
const std::vector<size_t>& b_shape,
|
scale,
|
||||||
pthreadpool_t pt_pool) {
|
tensor.ndimension(),
|
||||||
return xnn_reshape_add_nd_qs8(
|
tensor_shape.data(),
|
||||||
op, /* xnn_operator_t add_op */
|
nullptr,
|
||||||
a_shape.size(), /* size_t num_input1_dims */
|
external_id,
|
||||||
a_shape.data(), /* const size_t* input1_shape */
|
flags,
|
||||||
b_shape.size(), /* size_t num_input2_dims */
|
&id);
|
||||||
b_shape.data(), /* const size_t* input2_shape */
|
|
||||||
pt_pool); /* pthreadpool_t threadpool */
|
|
||||||
}
|
|
||||||
|
|
||||||
C10_ALWAYS_INLINE
|
|
||||||
enum xnn_status xnnp_setup_add_nd(
|
|
||||||
xnn_operator_t op,
|
|
||||||
const int8_t* da,
|
|
||||||
const int8_t* db,
|
|
||||||
int8_t* dc,
|
|
||||||
pthreadpool_t pt_pool) {
|
|
||||||
return xnn_setup_add_nd_qs8(
|
|
||||||
op, /* xnn_operator_t add_op */
|
|
||||||
da, /* const int8_t* input1 */
|
|
||||||
db, /* const int8_t* input2 */
|
|
||||||
dc); /* int8_t* output */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t, bool ReLUFused = false>
|
template <typename scalar_t, bool ReLUFused = false>
|
||||||
Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||||
using underlying_t = typename scalar_t::underlying;
|
|
||||||
const string func_name = "xnnp_add()";
|
const string func_name = "xnnp_add()";
|
||||||
TORCH_CHECK(qa.ndimension() > 0, func_name, ": Got empty input tensor.");
|
TORCH_CHECK(qa.ndimension() > 0, func_name, ": Got empty input tensor.");
|
||||||
TORCH_CHECK(at::native::xnnpack::available(), func_name, ": XNNPACK is not available")
|
TORCH_CHECK(at::native::xnnpack::available(), func_name, ": XNNPACK is not available")
|
||||||
|
|
@ -299,12 +264,6 @@ Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||||
auto qa_mem_format = qa.suggest_memory_format();
|
auto qa_mem_format = qa.suggest_memory_format();
|
||||||
Tensor qa_contig = qa.contiguous(qa_mem_format);
|
Tensor qa_contig = qa.contiguous(qa_mem_format);
|
||||||
Tensor qb_contig = qb.contiguous(qa_mem_format);
|
Tensor qb_contig = qb.contiguous(qa_mem_format);
|
||||||
|
|
||||||
const auto a_zero_point = qa_contig.q_zero_point();
|
|
||||||
const auto b_zero_point = qb_contig.q_zero_point();
|
|
||||||
const auto a_scale = qa_contig.q_scale();
|
|
||||||
const auto b_scale = qb_contig.q_scale();
|
|
||||||
|
|
||||||
Tensor qy = at::native::empty_affine_quantized(
|
Tensor qy = at::native::empty_affine_quantized(
|
||||||
at::infer_size_dimvector(qa_contig.sizes(), qb_contig.sizes()),
|
at::infer_size_dimvector(qa_contig.sizes(), qb_contig.sizes()),
|
||||||
qa.scalar_type(),
|
qa.scalar_type(),
|
||||||
|
|
@ -319,72 +278,108 @@ Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||||
return qy;
|
return qy;
|
||||||
}
|
}
|
||||||
|
|
||||||
xnn_operator_t xnnp_op = nullptr;
|
|
||||||
xnnpack_operator xnnp_add_operator;
|
|
||||||
|
|
||||||
auto output_max = std::numeric_limits<underlying_t>::max();
|
auto output_max = std::numeric_limits<float>::infinity();
|
||||||
auto output_min = std::numeric_limits<underlying_t>::min();
|
auto output_min = -std::numeric_limits<float>::infinity();
|
||||||
if (ReLUFused) {
|
if (ReLUFused) {
|
||||||
/*
|
output_min = 0;
|
||||||
* FIXME: use activationLimits<T>()
|
|
||||||
* With <T>, MSVC runs into "error C3862: identifier activationLimits not found".
|
|
||||||
*/
|
|
||||||
constexpr int64_t qmin = std::numeric_limits<underlying_t>::min();
|
|
||||||
constexpr int64_t qmax = std::numeric_limits<underlying_t>::max();
|
|
||||||
int64_t qvalue = static_cast<int64_t>(zero_point);
|
|
||||||
qvalue = std::max<int64_t>(qvalue, qmin);
|
|
||||||
output_min = static_cast<underlying_t>(std::min<int64_t>(qvalue, qmax));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create an operator
|
// Create XNNPACK Subgraph
|
||||||
auto status = xnnp_create_add_nd(
|
xnn_subgraph_t subgraph_ptr = nullptr;
|
||||||
a_zero_point,
|
auto status = xnn_create_subgraph(
|
||||||
a_scale,
|
/*external_value_ids=*/3,
|
||||||
b_zero_point,
|
/*flags=*/0,
|
||||||
b_scale,
|
&subgraph_ptr);
|
||||||
static_cast<underlying_t>(zero_point),
|
TORCH_CHECK(
|
||||||
static_cast<float>(scale),
|
status == xnn_status_success,
|
||||||
output_min,
|
func_name, ": xnn create subgraph failed(", status,")!");
|
||||||
output_max,
|
std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
|
||||||
|
subgraph_ptr, &xnn_delete_subgraph);
|
||||||
|
|
||||||
|
uint32_t input0_id = XNN_INVALID_VALUE_ID, input1_id = XNN_INVALID_VALUE_ID, output_id = XNN_INVALID_VALUE_ID;
|
||||||
|
|
||||||
|
// Defining the quantized input 0
|
||||||
|
status = xnnp_define_q_tensor(
|
||||||
|
qa,
|
||||||
|
qa_mem_format,
|
||||||
|
input0_id,
|
||||||
|
subgraph_ptr,
|
||||||
0,
|
0,
|
||||||
&xnnp_op);
|
XNN_VALUE_FLAG_EXTERNAL_INPUT
|
||||||
xnnp_add_operator = xnnpack_operator(xnnp_op);
|
);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success && input0_id != XNN_INVALID_VALUE_ID,
|
||||||
|
func_name, ": xnn define input 0 failed(", status,")!");
|
||||||
|
|
||||||
|
// Defining the quantized input 1
|
||||||
|
status = xnnp_define_q_tensor(
|
||||||
|
qb,
|
||||||
|
qa_mem_format,
|
||||||
|
input1_id,
|
||||||
|
subgraph_ptr,
|
||||||
|
1,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_INPUT
|
||||||
|
);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success && input1_id != XNN_INVALID_VALUE_ID,
|
||||||
|
func_name, ": xnn define input 1 failed(", status,")!");
|
||||||
|
|
||||||
|
// Defining the quantized output
|
||||||
|
status = xnnp_define_q_tensor(
|
||||||
|
qy,
|
||||||
|
qa_mem_format,
|
||||||
|
output_id,
|
||||||
|
subgraph_ptr,
|
||||||
|
2,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_OUTPUT
|
||||||
|
);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success && output_id != XNN_INVALID_VALUE_ID,
|
||||||
|
func_name, ": xnn define output failed(", status,")!");
|
||||||
|
|
||||||
|
const struct xnn_binary_params binary_params = {output_min, output_max};
|
||||||
|
status = xnn_define_binary(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_binary_add,
|
||||||
|
&binary_params,
|
||||||
|
input0_id,
|
||||||
|
input1_id,
|
||||||
|
output_id,
|
||||||
|
0);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
status == xnn_status_success,
|
status == xnn_status_success,
|
||||||
func_name, ": xnn create operator failed(", status,")!");
|
func_name, ": xnn define binary add failed(", status,")!");
|
||||||
|
|
||||||
const auto qa_shape = xnnp_utils::get_mem_format_aware_shape(qa_contig);
|
|
||||||
const auto qb_shape = xnnp_utils::get_mem_format_aware_shape(qb_contig);
|
|
||||||
|
|
||||||
// Reshape the operator
|
|
||||||
status = xnnp_reshape_add_nd(
|
|
||||||
xnnp_add_operator.get(),
|
|
||||||
qa_shape,
|
|
||||||
qb_shape,
|
|
||||||
caffe2::pthreadpool_());
|
|
||||||
|
|
||||||
|
// create runtime
|
||||||
|
xnn_runtime_t runtime_ptr = nullptr;
|
||||||
|
status = xnn_create_runtime_v2(subgraph_ptr, caffe2::pthreadpool_(), 0, &runtime_ptr);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
status == xnn_status_success,
|
status == xnn_status_success,
|
||||||
func_name, ": xnn reshape operator failed(", status,")!");
|
func_name, ": xnn create runtime failed(", status,")!");
|
||||||
|
TORCH_CHECK(
|
||||||
|
runtime_ptr != nullptr,
|
||||||
|
func_name, ": xnn create runtime failed because runtime_ptr is null");
|
||||||
|
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(
|
||||||
|
runtime_ptr, &xnn_delete_runtime);
|
||||||
|
|
||||||
// Setup the operator
|
std::array<xnn_external_value, 3> external = {
|
||||||
status = xnnp_setup_add_nd(
|
xnn_external_value{input0_id, reinterpret_cast<void*>(qa_contig.data_ptr<scalar_t>())},
|
||||||
xnnp_add_operator.get(),
|
xnn_external_value{input1_id, reinterpret_cast<void*>(qb_contig.data_ptr<scalar_t>())},
|
||||||
reinterpret_cast<const underlying_t*>(qa_contig.data_ptr<scalar_t>()),
|
xnn_external_value{output_id, reinterpret_cast<void*>(qy.data_ptr<scalar_t>())}};
|
||||||
reinterpret_cast<const underlying_t*>(qb_contig.data_ptr<scalar_t>()),
|
|
||||||
reinterpret_cast<underlying_t*>(qy.data_ptr<scalar_t>()),
|
status = xnn_setup_runtime(
|
||||||
caffe2::pthreadpool_());
|
runtime_ptr,
|
||||||
|
external.size(),
|
||||||
|
external.data());
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
status == xnn_status_success,
|
status == xnn_status_success,
|
||||||
func_name, ": xnn setup operator failed(", status,")!");
|
func_name, ": xnn setup runtime failed(", status,")!");
|
||||||
|
status = xnn_invoke_runtime(runtime_ptr);
|
||||||
// Run the operator
|
|
||||||
status = xnn_run_operator(
|
|
||||||
xnnp_add_operator.get(), /* xnn_operator_t op */
|
|
||||||
caffe2::pthreadpool_()); /* pthreadpool_t threadpool */
|
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
status == xnn_status_success,
|
status == xnn_status_success,
|
||||||
func_name, ": xnn run operator failed(", status,")");
|
func_name, ": xnn invoke runtime failed(", status,")!");
|
||||||
|
|
||||||
return qy;
|
return qy;
|
||||||
}
|
}
|
||||||
#endif // USE_XNNPACK
|
#endif // USE_XNNPACK
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@
|
||||||
#include <ATen/native/quantized/cpu/init_qnnpack.h>
|
#include <ATen/native/quantized/cpu/init_qnnpack.h>
|
||||||
#include <ATen/quantized/Quantizer.h>
|
#include <ATen/quantized/Quantizer.h>
|
||||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||||
#include <torch/library.h>
|
|
||||||
|
|
||||||
#ifndef AT_PER_OPERATOR_HEADERS
|
#ifndef AT_PER_OPERATOR_HEADERS
|
||||||
#include <ATen/Functions.h>
|
#include <ATen/Functions.h>
|
||||||
|
|
@ -56,14 +55,32 @@ Tensor _mul_out(Tensor& out, const Tensor& self, const Tensor& other) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_XNNPACK
|
#ifdef USE_XNNPACK
|
||||||
|
C10_ALWAYS_INLINE
|
||||||
|
enum xnn_status xnnp_define_q_tensor(const Tensor& tensor, MemoryFormat format, uint32_t& id, xnn_subgraph_t subgraph_ptr, uint32_t external_id, uint32_t flags){
|
||||||
|
Tensor contig_tensor = tensor.contiguous(format);
|
||||||
|
const auto tensor_shape = xnnp_utils::get_mem_format_aware_shape(contig_tensor);
|
||||||
|
const int32_t zero_point = static_cast<int32_t>(contig_tensor.q_zero_point());
|
||||||
|
const float scale = static_cast<float>(contig_tensor.q_scale());
|
||||||
|
|
||||||
|
return xnn_define_quantized_tensor_value(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_datatype_qint8,
|
||||||
|
zero_point,
|
||||||
|
scale,
|
||||||
|
tensor.ndimension(),
|
||||||
|
tensor_shape.data(),
|
||||||
|
nullptr,
|
||||||
|
external_id,
|
||||||
|
flags,
|
||||||
|
&id);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename scalar_t, bool ReLUFused = false>
|
template <typename scalar_t, bool ReLUFused = false>
|
||||||
Tensor _mul_out_xnnpack(
|
Tensor _mul_out_xnnpack(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const Tensor& other,
|
const Tensor& other,
|
||||||
double output_scale,
|
double output_scale,
|
||||||
int64_t output_zero_point) {
|
int64_t output_zero_point) {
|
||||||
using underlying_t = typename scalar_t::underlying;
|
|
||||||
|
|
||||||
const string func_name = "xnnp_mul()";
|
const string func_name = "xnnp_mul()";
|
||||||
TORCH_CHECK(self.ndimension() > 0, func_name, ": Got empty input tensor.");
|
TORCH_CHECK(self.ndimension() > 0, func_name, ": Got empty input tensor.");
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
|
|
@ -89,96 +106,108 @@ Tensor _mul_out_xnnpack(
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t self_zero_point = self_contig.q_zero_point();
|
auto output_max = std::numeric_limits<float>::infinity();
|
||||||
double self_scale = self_contig.q_scale();
|
auto output_min = -std::numeric_limits<float>::infinity();
|
||||||
int64_t other_zero_point = other_contig.q_zero_point();
|
if (ReLUFused) {
|
||||||
double other_scale = other_contig.q_scale();
|
output_min = 0;
|
||||||
|
|
||||||
int64_t output_min = std::numeric_limits<underlying_t>::min();
|
|
||||||
int64_t output_max = std::numeric_limits<underlying_t>::max();
|
|
||||||
|
|
||||||
if(ReLUFused) {
|
|
||||||
/*
|
|
||||||
* FIXME: use activationLimits<T>()
|
|
||||||
* With <T>, MSVC runs into "error C3862: identifier activationLimits not
|
|
||||||
* found".
|
|
||||||
*/
|
|
||||||
constexpr int64_t qmin = std::numeric_limits<underlying_t>::min();
|
|
||||||
constexpr int64_t qmax = std::numeric_limits<underlying_t>::max();
|
|
||||||
int64_t qvalue = static_cast<int64_t>(output_zero_point);
|
|
||||||
qvalue = std::max<int64_t>(qvalue, qmin);
|
|
||||||
output_min = static_cast<underlying_t>(std::min<int64_t>(qvalue, qmax));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xnn_operator_t xnnp_op = nullptr;
|
// Create XNNPACK Subgraph
|
||||||
xnnpack_operator xnnp_qmul_operator;
|
xnn_subgraph_t subgraph_ptr = nullptr;
|
||||||
|
auto status = xnn_create_subgraph(
|
||||||
|
/*external_value_ids=*/3,
|
||||||
|
/*flags=*/0,
|
||||||
|
&subgraph_ptr);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
func_name, ": xnn create subgraph failed(", status,")!");
|
||||||
|
std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
|
||||||
|
subgraph_ptr, &xnn_delete_subgraph);
|
||||||
|
|
||||||
// create xnnpack multiply operator ...
|
uint32_t input0_id = XNN_INVALID_VALUE_ID;
|
||||||
auto status = xnn_create_multiply_nd_qs8(
|
uint32_t input1_id = XNN_INVALID_VALUE_ID;
|
||||||
self_zero_point,
|
uint32_t output_id = XNN_INVALID_VALUE_ID;
|
||||||
self_scale,
|
|
||||||
other_zero_point,
|
// Defining the quantized input 0
|
||||||
other_scale,
|
status = xnnp_define_q_tensor(
|
||||||
static_cast<underlying_t>(output_zero_point),
|
self,
|
||||||
static_cast<float>(output_scale),
|
qa_mem_format,
|
||||||
output_min,
|
input0_id,
|
||||||
output_max,
|
subgraph_ptr,
|
||||||
0,
|
0,
|
||||||
&xnnp_op);
|
XNN_VALUE_FLAG_EXTERNAL_INPUT
|
||||||
|
|
||||||
TORCH_CHECK(
|
|
||||||
status == xnn_status_success,
|
|
||||||
func_name,
|
|
||||||
": xnn create operator failed(",
|
|
||||||
status,
|
|
||||||
")!");
|
|
||||||
xnnp_qmul_operator = xnnpack_operator(xnnp_op);
|
|
||||||
|
|
||||||
|
|
||||||
const auto self_shape = xnnp_utils::get_mem_format_aware_shape(self_contig);
|
|
||||||
const auto other_shape = xnnp_utils::get_mem_format_aware_shape(other_contig);
|
|
||||||
|
|
||||||
// reshape operator
|
|
||||||
status = xnn_reshape_multiply_nd_qs8(
|
|
||||||
xnnp_qmul_operator.get(),
|
|
||||||
self_shape.size(),
|
|
||||||
self_shape.data(),
|
|
||||||
other_shape.size(),
|
|
||||||
other_shape.data(),
|
|
||||||
caffe2::pthreadpool_());
|
|
||||||
|
|
||||||
TORCH_CHECK(
|
|
||||||
status == xnn_status_success,
|
|
||||||
func_name,
|
|
||||||
": xnn reshape operator failed(",
|
|
||||||
status,
|
|
||||||
")!");
|
|
||||||
|
|
||||||
// set up operator
|
|
||||||
status = xnn_setup_multiply_nd_qs8(
|
|
||||||
xnnp_qmul_operator.get(),
|
|
||||||
reinterpret_cast<const underlying_t*>(self_contig.data_ptr<scalar_t>()),
|
|
||||||
reinterpret_cast<const underlying_t*>(other_contig.data_ptr<scalar_t>()),
|
|
||||||
reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>())
|
|
||||||
);
|
);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success && input0_id != XNN_INVALID_VALUE_ID,
|
||||||
|
func_name, ": xnn define input 0 failed(", status,")!");
|
||||||
|
|
||||||
|
// Defining the quantized input 1
|
||||||
|
status = xnnp_define_q_tensor(
|
||||||
|
other,
|
||||||
|
qa_mem_format,
|
||||||
|
input1_id,
|
||||||
|
subgraph_ptr,
|
||||||
|
1,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_INPUT
|
||||||
|
);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success && input1_id != XNN_INVALID_VALUE_ID,
|
||||||
|
func_name, ": xnn define input 1 failed(", status,")!");
|
||||||
|
|
||||||
|
// Defining the quantized output
|
||||||
|
status = xnnp_define_q_tensor(
|
||||||
|
out,
|
||||||
|
qa_mem_format,
|
||||||
|
output_id,
|
||||||
|
subgraph_ptr,
|
||||||
|
2,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_OUTPUT
|
||||||
|
);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success && output_id != XNN_INVALID_VALUE_ID,
|
||||||
|
func_name, ": xnn define output failed(", status,")!");
|
||||||
|
|
||||||
|
const struct xnn_binary_params binary_params = {output_min, output_max};
|
||||||
|
status = xnn_define_binary(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_binary_multiply,
|
||||||
|
&binary_params,
|
||||||
|
input0_id,
|
||||||
|
input1_id,
|
||||||
|
output_id,
|
||||||
|
0);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
status == xnn_status_success,
|
status == xnn_status_success,
|
||||||
func_name,
|
func_name, ": xnn define binary add failed(", status,")!");
|
||||||
": xnn setup operator failed(",
|
|
||||||
status,
|
|
||||||
")!");
|
|
||||||
|
|
||||||
// Run the operator
|
// create runtime
|
||||||
status = xnn_run_operator(
|
xnn_runtime_t runtime_ptr = nullptr;
|
||||||
xnnp_qmul_operator.get(), /* xnn_operator_t op */
|
status = xnn_create_runtime_v2(subgraph_ptr, caffe2::pthreadpool_(), 0, &runtime_ptr);
|
||||||
caffe2::pthreadpool_()); /* pthreadpool_t threadpool */
|
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
status == xnn_status_success,
|
status == xnn_status_success,
|
||||||
func_name,
|
func_name, ": xnn create runtime failed(", status,")!");
|
||||||
": xnn run operator failed(",
|
TORCH_CHECK(
|
||||||
status,
|
runtime_ptr != nullptr,
|
||||||
")");
|
func_name, ": xnn create runtime failed because runtime_ptr is null");
|
||||||
|
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(
|
||||||
|
runtime_ptr, &xnn_delete_runtime);
|
||||||
|
|
||||||
|
std::array<xnn_external_value, 3> external = {
|
||||||
|
xnn_external_value{input0_id, reinterpret_cast<void*>(self.data_ptr<scalar_t>())},
|
||||||
|
xnn_external_value{input1_id, reinterpret_cast<void*>(other.data_ptr<scalar_t>())},
|
||||||
|
xnn_external_value{output_id, reinterpret_cast<void*>(out.data_ptr<scalar_t>())}};
|
||||||
|
|
||||||
|
status = xnn_setup_runtime(
|
||||||
|
runtime_ptr,
|
||||||
|
external.size(),
|
||||||
|
external.data());
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
func_name, ": xnn setup runtime failed(", status,")!");
|
||||||
|
status = xnn_invoke_runtime(runtime_ptr);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
func_name, ": xnn invoke runtime failed(", status,")!");
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,46 +19,84 @@ bool use_hardswish(
|
||||||
|
|
||||||
static Tensor& hardswish_impl(Tensor& input, Tensor& output) {
|
static Tensor& hardswish_impl(Tensor& input, Tensor& output) {
|
||||||
using namespace internal;
|
using namespace internal;
|
||||||
|
// Create XNNPACK Subgraph
|
||||||
xnn_operator_t hardswish_op{};
|
xnn_subgraph_t subgraph_ptr = nullptr;
|
||||||
const xnn_status create_status = xnn_create_hardswish_nc_f32(
|
xnn_status status = xnn_create_subgraph(
|
||||||
0, // flags
|
/*external_value_ids=*/2,
|
||||||
&hardswish_op);
|
/*flags=*/0,
|
||||||
|
&subgraph_ptr);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == create_status,
|
status == xnn_status_success,
|
||||||
"xnn_create_hardswish_nc_f32 failed!");
|
"xnn create subgraph failed(", status,")!");
|
||||||
|
std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
|
||||||
Operator hardswish_scoped_op(hardswish_op);
|
subgraph_ptr, &xnn_delete_subgraph);
|
||||||
|
uint32_t input_id = XNN_INVALID_VALUE_ID, output_id = XNN_INVALID_VALUE_ID;
|
||||||
const xnn_status reshape_status = xnn_reshape_hardswish_nc_f32(
|
std::vector<size_t> input_output_shape(input.sizes().begin(), input.sizes().end());
|
||||||
hardswish_op,
|
|
||||||
input.numel(), // Batch
|
|
||||||
1, // channels
|
|
||||||
1, // input stride
|
|
||||||
1, // output stride
|
|
||||||
caffe2::pthreadpool_()); // threadpool
|
|
||||||
|
|
||||||
|
status = xnn_define_tensor_value(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_datatype_fp32,
|
||||||
|
input_output_shape.size(),
|
||||||
|
input_output_shape.data(),
|
||||||
|
nullptr,
|
||||||
|
0,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_INPUT,
|
||||||
|
&input_id
|
||||||
|
);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == reshape_status,
|
status == xnn_status_success,
|
||||||
"xnn_reshape_hardswish_nc_f32 failed!");
|
"defining xnn input failed(", status,")!");
|
||||||
|
|
||||||
const xnn_status setup_status = xnn_setup_hardswish_nc_f32(
|
|
||||||
hardswish_op,
|
|
||||||
input.data_ptr<float>(),
|
|
||||||
output.data_ptr<float>());
|
|
||||||
|
|
||||||
|
status = xnn_define_tensor_value(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_datatype_fp32,
|
||||||
|
input_output_shape.size(),
|
||||||
|
input_output_shape.data(),
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_OUTPUT,
|
||||||
|
&output_id
|
||||||
|
);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == setup_status,
|
status == xnn_status_success,
|
||||||
"xnn_setup_hardswish_nc_f32 failed!");
|
"defining xnn output failed(", status,")!");
|
||||||
|
|
||||||
const xnn_status run_status = xnn_run_operator(
|
status = xnn_define_unary(
|
||||||
hardswish_op,
|
subgraph_ptr,
|
||||||
caffe2::pthreadpool_()); // threadpool
|
xnn_unary_hardswish,
|
||||||
|
nullptr,
|
||||||
|
input_id,
|
||||||
|
output_id,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
|
||||||
TORCH_INTERNAL_ASSERT(
|
// create runtime
|
||||||
xnn_status_success == run_status,
|
xnn_runtime_t runtime_ptr = nullptr;
|
||||||
"xnn_run_operator failed!");
|
status = xnn_create_runtime_v2(subgraph_ptr, caffe2::pthreadpool_(), 0, &runtime_ptr);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
"xnn create runtime failed(", status,")!");
|
||||||
|
TORCH_CHECK(
|
||||||
|
runtime_ptr != nullptr,
|
||||||
|
"xnn create runtime failed because runtime_ptr is null");
|
||||||
|
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(
|
||||||
|
runtime_ptr, &xnn_delete_runtime);
|
||||||
|
|
||||||
|
std::array<xnn_external_value, 2> external = {
|
||||||
|
xnn_external_value{input_id, input.data_ptr<float>()},
|
||||||
|
xnn_external_value{output_id, output.data_ptr<float>()}};
|
||||||
|
|
||||||
|
status = xnn_setup_runtime(
|
||||||
|
runtime_ptr,
|
||||||
|
external.size(),
|
||||||
|
external.data());
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
"xnn setup runtime failed(", status,")!");
|
||||||
|
status = xnn_invoke_runtime(runtime_ptr);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
"xnn invoke runtime failed(", status,")!");
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,27 @@
|
||||||
|
|
||||||
namespace at::native::xnnpack {
|
namespace at::native::xnnpack {
|
||||||
|
|
||||||
|
inline std::vector<size_t> get_mem_format_aware_shape(const at::Tensor& in) {
|
||||||
|
const auto mem_format = in.suggest_memory_format();
|
||||||
|
const auto& sizes = in.sizes();
|
||||||
|
std::vector<size_t> ret(sizes.begin(), sizes.end());
|
||||||
|
if (mem_format == c10::MemoryFormat::ChannelsLast) {
|
||||||
|
// NCHW -> NHWC
|
||||||
|
// 0123 -> 0231
|
||||||
|
ret[1] = sizes[2]; /* H */
|
||||||
|
ret[2] = sizes[3]; /* W */
|
||||||
|
ret[3] = sizes[1]; /* C */
|
||||||
|
} else if (mem_format == c10::MemoryFormat::ChannelsLast3d) {
|
||||||
|
// NCDHW -> NDHWC
|
||||||
|
// 01234 -> 02341
|
||||||
|
ret[1] = sizes[2]; /* D */
|
||||||
|
ret[2] = sizes[3]; /* H */
|
||||||
|
ret[3] = sizes[4]; /* W */
|
||||||
|
ret[4] = sizes[1]; /* C */
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
bool use_global_average_pool(const Tensor& input) {
|
bool use_global_average_pool(const Tensor& input) {
|
||||||
return xnnpack::available() && (1 <= input.ndimension()) &&
|
return xnnpack::available() && (1 <= input.ndimension()) &&
|
||||||
(input.device().is_cpu()) && (kFloat == input.scalar_type()) &&
|
(input.device().is_cpu()) && (kFloat == input.scalar_type()) &&
|
||||||
|
|
@ -31,63 +52,91 @@ Tensor global_average_pool(const Tensor& input) {
|
||||||
MemoryFormat::ChannelsLast,
|
MemoryFormat::ChannelsLast,
|
||||||
input_padded_contig_nhwc.opt_names());
|
input_padded_contig_nhwc.opt_names());
|
||||||
|
|
||||||
xnn_operator_t global_average_pooling_op{};
|
// Create XNNPACK Subgraph
|
||||||
const xnn_status create_status = xnn_create_global_average_pooling_nwc_f32(
|
xnn_subgraph_t subgraph_ptr = nullptr;
|
||||||
-std::numeric_limits<float>::infinity(),
|
xnn_status status = xnn_create_subgraph(
|
||||||
std::numeric_limits<float>::infinity(),
|
/*external_value_ids=*/2,
|
||||||
0 /* flags */,
|
/*flags=*/0,
|
||||||
&global_average_pooling_op);
|
&subgraph_ptr);
|
||||||
|
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == create_status,
|
status == xnn_status_success,
|
||||||
"xnn_create_global_average_pooling_nwc_f32 failed!");
|
"xnn create subgraph failed(", status,")!");
|
||||||
|
std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
|
||||||
|
subgraph_ptr, &xnn_delete_subgraph);
|
||||||
|
uint32_t input_id = XNN_INVALID_VALUE_ID, output_id = XNN_INVALID_VALUE_ID;
|
||||||
|
|
||||||
Operator global_avg_pool_scoped_op(global_average_pooling_op);
|
|
||||||
|
|
||||||
size_t workspace_size = 0;
|
|
||||||
size_t workspace_alignment = 0;
|
|
||||||
|
|
||||||
const xnn_status reshape_status = xnn_reshape_global_average_pooling_nwc_f32(
|
|
||||||
global_average_pooling_op,
|
|
||||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
|
|
||||||
input_padded_contig_nhwc.size(Layout::Activation4D::width) *
|
|
||||||
input_padded_contig_nhwc.size(Layout::Activation4D::height), // width
|
|
||||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
|
|
||||||
input_padded_contig_nhwc.size(
|
|
||||||
Layout::Activation4D::channels), // input stride
|
|
||||||
input_padded_contig_nhwc.size(
|
|
||||||
Layout::Activation4D::channels), // output stride
|
|
||||||
&workspace_size, // workspace_size
|
|
||||||
&workspace_alignment, // workspace_alignment
|
|
||||||
caffe2::pthreadpool_());
|
|
||||||
|
|
||||||
|
const auto& input_shape = get_mem_format_aware_shape(input_padded_contig_nhwc);
|
||||||
|
status = xnn_define_tensor_value(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_datatype_fp32,
|
||||||
|
input_shape.size(),
|
||||||
|
input_shape.data(),
|
||||||
|
nullptr,
|
||||||
|
0,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_INPUT,
|
||||||
|
&input_id
|
||||||
|
);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == reshape_status,
|
status == xnn_status_success,
|
||||||
"xnn_reshape_global_average_pooling_nwc_f32 failed!");
|
"defining xnn input failed(", status,")!");
|
||||||
|
|
||||||
// Create Workspace pointer, which we will align and pad with 16 bytes
|
|
||||||
size_t xnnpack_buffer_padding = 16;
|
|
||||||
std::vector<char> workspace_vector(workspace_size + workspace_alignment + xnnpack_buffer_padding);
|
|
||||||
void* maybe_aligned_workspace = workspace_vector.data();
|
|
||||||
void* aligned_workspace =
|
|
||||||
(void*)((intptr_t)maybe_aligned_workspace + workspace_alignment - (intptr_t)maybe_aligned_workspace % workspace_alignment);
|
|
||||||
|
|
||||||
const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
|
|
||||||
global_average_pooling_op,
|
|
||||||
aligned_workspace,
|
|
||||||
input_padded_contig_nhwc.data_ptr<float>(),
|
|
||||||
output.data_ptr<float>());
|
|
||||||
|
|
||||||
|
const auto& output_shape = get_mem_format_aware_shape(output);
|
||||||
|
status = xnn_define_tensor_value(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_datatype_fp32,
|
||||||
|
output_shape.size(),
|
||||||
|
output_shape.data(),
|
||||||
|
nullptr,
|
||||||
|
1,
|
||||||
|
XNN_VALUE_FLAG_EXTERNAL_OUTPUT,
|
||||||
|
&output_id
|
||||||
|
);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == setup_status,
|
status == xnn_status_success,
|
||||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
"defining xnn output failed(", status,")!");
|
||||||
|
|
||||||
const xnn_status run_status =
|
|
||||||
xnn_run_operator(global_average_pooling_op, caffe2::pthreadpool_());
|
|
||||||
|
|
||||||
|
std::vector<size_t> reduce_dims{1, 2};
|
||||||
|
status = xnn_define_static_reduce(
|
||||||
|
subgraph_ptr,
|
||||||
|
xnn_reduce_mean,
|
||||||
|
reduce_dims.size(),
|
||||||
|
reduce_dims.data(),
|
||||||
|
input_id,
|
||||||
|
output_id,
|
||||||
|
0
|
||||||
|
);
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
xnn_status_success == run_status,
|
status == xnn_status_success,
|
||||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
"defining xnn static reduce failed(", status,")!");
|
||||||
|
|
||||||
|
// create runtime
|
||||||
|
xnn_runtime_t runtime_ptr = nullptr;
|
||||||
|
status = xnn_create_runtime_v2(subgraph_ptr, caffe2::pthreadpool_(), 0, &runtime_ptr);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
"xnn create runtime failed(", status,")!");
|
||||||
|
TORCH_CHECK(
|
||||||
|
runtime_ptr != nullptr,
|
||||||
|
"xnn create runtime failed because runtime_ptr is null");
|
||||||
|
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> auto_runtime(
|
||||||
|
runtime_ptr, &xnn_delete_runtime);
|
||||||
|
|
||||||
|
std::array<xnn_external_value, 2> external = {
|
||||||
|
xnn_external_value{input_id, input_padded_contig_nhwc.data_ptr<float>()},
|
||||||
|
xnn_external_value{output_id, output.data_ptr<float>()}};
|
||||||
|
|
||||||
|
status = xnn_setup_runtime(
|
||||||
|
runtime_ptr,
|
||||||
|
external.size(),
|
||||||
|
external.data());
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
"xnn setup runtime failed(", status,")!");
|
||||||
|
status = xnn_invoke_runtime(runtime_ptr);
|
||||||
|
TORCH_CHECK(
|
||||||
|
status == xnn_status_success,
|
||||||
|
"xnn invoke runtime failed(", status,")!");
|
||||||
|
|
||||||
return output.to(input.suggest_memory_format());
|
return output.to(input.suggest_memory_format());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -516,6 +516,9 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
|
||||||
# Disable I8MM For CI since clang 9 does not support neon i8mm.
|
# Disable I8MM For CI since clang 9 does not support neon i8mm.
|
||||||
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
|
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
|
||||||
|
|
||||||
|
# Disable avxvnni int8
|
||||||
|
set(XNNPACK_ENABLE_AVXVNNIINT8 OFF CACHE BOOL "")
|
||||||
|
|
||||||
# Older MSVC versions don't support AVX512FP. TODO Minimum version support?
|
# Older MSVC versions don't support AVX512FP. TODO Minimum version support?
|
||||||
IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||||
set(XNNPACK_ENABLE_AVX512FP16 OFF CACHE BOOL "")
|
set(XNNPACK_ENABLE_AVX512FP16 OFF CACHE BOOL "")
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,7 @@ else()
|
||||||
|
|
||||||
if(@USE_XNNPACK@)
|
if(@USE_XNNPACK@)
|
||||||
append_torchlib_if_found(XNNPACK)
|
append_torchlib_if_found(XNNPACK)
|
||||||
|
append_torchlib_if_found(microkernels-prod)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
append_torchlib_if_found(caffe2_protos protobuf-lite protobuf protoc)
|
append_torchlib_if_found(caffe2_protos protobuf-lite protobuf protoc)
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,7 @@ else
|
||||||
end
|
end
|
||||||
|
|
||||||
puts "Linking static libraries..."
|
puts "Linking static libraries..."
|
||||||
libs = ['libc10.a', 'libclog.a', 'libpthreadpool.a', 'libXNNPACK.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a']
|
libs = ['libc10.a', 'libclog.a', 'libpthreadpool.a', 'libXNNPACK.a', 'libmicrokernels-prod.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a']
|
||||||
frameworks = ['CoreML', 'Metal', 'MetalPerformanceShaders', 'Accelerate', 'UIKit']
|
frameworks = ['CoreML', 'Metal', 'MetalPerformanceShaders', 'Accelerate', 'UIKit']
|
||||||
targets.each do |target|
|
targets.each do |target|
|
||||||
# NB: All these libraries and frameworks have already been linked by TestApp, adding them
|
# NB: All these libraries and frameworks have already been linked by TestApp, adding them
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ end
|
||||||
|
|
||||||
# link static libraries
|
# link static libraries
|
||||||
target.frameworks_build_phases.clear
|
target.frameworks_build_phases.clear
|
||||||
libs = ['libc10.a', 'libclog.a', 'libpthreadpool.a', 'libXNNPACK.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a', 'libkineto.a']
|
libs = ['libc10.a', 'libclog.a', 'libpthreadpool.a', 'libXNNPACK.a', 'libmicrokernels-prod.a', 'libeigen_blas.a', 'libcpuinfo.a', 'libpytorch_qnnpack.a', 'libtorch_cpu.a', 'libtorch.a', 'libkineto.a']
|
||||||
for lib in libs do
|
for lib in libs do
|
||||||
path = "#{install_path}/lib/#{lib}"
|
path = "#{install_path}/lib/#{lib}"
|
||||||
if File.exist?(path)
|
if File.exist?(path)
|
||||||
|
|
|
||||||
2
third_party/XNNPACK
vendored
2
third_party/XNNPACK
vendored
|
|
@ -1 +1 @@
|
||||||
Subproject commit 87ee0b46b834f67bad9025d4a82ed5654f3403d3
|
Subproject commit 4ea82e595b36106653175dcb04b2aa532660d0d8
|
||||||
820
third_party/xnnpack.buck.bzl
vendored
820
third_party/xnnpack.buck.bzl
vendored
File diff suppressed because it is too large
Load Diff
33
third_party/xnnpack_buck_shim.bzl
vendored
Normal file
33
third_party/xnnpack_buck_shim.bzl
vendored
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
load(
|
||||||
|
"//xplat/third-party/XNNPACK/XNNPACK:build_srcs.bzl",
|
||||||
|
_LOGGING_SRCS = "LOGGING_SRCS",
|
||||||
|
_OPERATOR_SRCS = "OPERATOR_SRCS",
|
||||||
|
_SUBGRAPH_SRCS = "SUBGRAPH_SRCS",
|
||||||
|
_TABLE_SRCS = "TABLE_SRCS",
|
||||||
|
_XNNPACK_SRCS = "XNNPACK_SRCS",
|
||||||
|
)
|
||||||
|
load("//xplat/third-party/XNNPACK/XNNPACK/gen:microkernels.bzl", "prod_srcs_for_arch")
|
||||||
|
load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
|
||||||
|
|
||||||
|
def define_xnnpack_build_src(xnnpack_build_src):
|
||||||
|
return ["XNNPACK/{}".format(src) for src in xnnpack_build_src]
|
||||||
|
|
||||||
|
def prod_srcs_for_arch_wrapper(arch):
|
||||||
|
prod_srcs = prod_srcs_for_arch(arch)
|
||||||
|
return define_xnnpack_build_src(prod_srcs)
|
||||||
|
|
||||||
|
def get_xnnpack_headers():
|
||||||
|
src_headers = subdir_glob([
|
||||||
|
("XNNPACK/src", "**/*.h"),
|
||||||
|
])
|
||||||
|
include_headers = subdir_glob([
|
||||||
|
("XNNPACK/include", "*.h"),
|
||||||
|
])
|
||||||
|
|
||||||
|
return src_headers | include_headers
|
||||||
|
|
||||||
|
OPERATOR_SRCS = define_xnnpack_build_src(_OPERATOR_SRCS)
|
||||||
|
SUBGRAPH_SRCS = define_xnnpack_build_src(_SUBGRAPH_SRCS)
|
||||||
|
TABLE_SRCS = define_xnnpack_build_src(_TABLE_SRCS)
|
||||||
|
XNNPACK_SRCS = define_xnnpack_build_src(_XNNPACK_SRCS)
|
||||||
|
LOGGING_SRCS = define_xnnpack_build_src(_LOGGING_SRCS)
|
||||||
1929
third_party/xnnpack_src_defs.bzl
vendored
1929
third_party/xnnpack_src_defs.bzl
vendored
File diff suppressed because it is too large
Load Diff
1596
third_party/xnnpack_wrapper_defs.bzl
vendored
1596
third_party/xnnpack_wrapper_defs.bzl
vendored
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user