mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Enable threading for XNNPACK ops. (#34547)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/34547 This enables threading by passing a threadpool to xnnpack ops. Test Plan: python test/test_xnnpack_integration.py Imported from OSS Differential Revision: D20370553 fbshipit-source-id: 4db08e73f8c69b9e722b0e11a00621c4e229a31a
This commit is contained in:
parent
4da5569300
commit
84bd71dbd4
|
|
@ -423,7 +423,7 @@ if(USE_PYTORCH_QNNPACK)
|
|||
endif()
|
||||
|
||||
if(USE_XNNPACK)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_XNNPACK")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_XNNPACK -DUSE_INTERNAL_THREADPOOL_IMPL")
|
||||
endif()
|
||||
|
||||
# ---[ Whitelist file if whitelist is specified
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#ifdef USE_XNNPACK
|
||||
|
||||
#include <xnnpack.h>
|
||||
#include "caffe2/utils/threadpool/ThreadPoolXNNPACK.h"
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
|
|
|||
|
|
@ -110,15 +110,15 @@ Tensor run(
|
|||
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>(), // output
|
||||
nullptr); // threadpool
|
||||
caffe2::xnnpack_threadpool()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_convolution2d_nhwc_f32 failed!");
|
||||
|
||||
const xnn_status run_status = xnn_run_operator(
|
||||
context.op.get(), // operator
|
||||
nullptr); // threadpool
|
||||
context.op.get(), // operator
|
||||
caffe2::xnnpack_threadpool()); // threadpool
|
||||
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
xnn_status_success == run_status,
|
||||
|
|
|
|||
|
|
@ -72,15 +72,15 @@ Tensor run(
|
|||
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
|
||||
padded_input.data_ptr<float>(), // input
|
||||
output.data_ptr<float>(), // output
|
||||
nullptr); // threadpool
|
||||
caffe2::xnnpack_threadpool()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_fully_connected_nc_f32 failed!");
|
||||
|
||||
const xnn_status run_status = xnn_run_operator(
|
||||
context.op.get(), // operator
|
||||
nullptr); // threadpool
|
||||
context.op.get(), // operator
|
||||
caffe2::xnnpack_threadpool()); // threadpool
|
||||
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
xnn_status_success == run_status,
|
||||
|
|
|
|||
|
|
@ -434,6 +434,15 @@ if(USE_XNNPACK)
|
|||
"${CONFU_DEPENDENCIES_BINARY_DIR}/XNNPACK")
|
||||
|
||||
set_property(TARGET XNNPACK PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
# Context: pthreadpool_get_threads_count implementation that is built in pytorch, uses
|
||||
# implementation defined in caffe2/utils/threadpool/pthreadpool_impl.cc. This implementation
|
||||
# assumes the the pthreadpool* passed is of type caffe2::ThradPool and thus does reinterpret cast.
|
||||
# This is not valid when we create pthreadpool via caffe2::xnnpack_threadpool, which is of type
|
||||
# compatible with new pthreadpool interface and is used in PT's XNNPACK integration.
|
||||
# Thus all the calls for pthreadpool_get_threads_count originating from XNNPACK must be routed
|
||||
# appropriately to pthreadpool_get_threads_count_xnnpack, which does not do the aforementioned
|
||||
# casting to caffe2::ThradPool. Once the threadpools are unified, we will not need this.
|
||||
target_compile_definitions(XNNPACK PRIVATE -Dpthreadpool_get_threads_count=pthreadpool_get_threads_count_xnnpack)
|
||||
endif()
|
||||
|
||||
include_directories(SYSTEM ${XNNPACK_INCLUDE_DIR})
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user