Enable threading for XNNPACK ops. (#34547)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34547

This enables threading by passing a threadpool to xnnpack ops.

Test Plan:
python test/test_xnnpack_integration.py

Imported from OSS

Differential Revision: D20370553

fbshipit-source-id: 4db08e73f8c69b9e722b0e11a00621c4e229a31a
This commit is contained in:
Kimish Patel 2020-03-14 12:48:24 -07:00 committed by Facebook GitHub Bot
parent 4da5569300
commit 84bd71dbd4
5 changed files with 17 additions and 7 deletions

View File

@ -423,7 +423,7 @@ if(USE_PYTORCH_QNNPACK)
endif()
if(USE_XNNPACK)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_XNNPACK")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_XNNPACK -DUSE_INTERNAL_THREADPOOL_IMPL")
endif()
# ---[ Whitelist file if whitelist is specified

View File

@ -5,6 +5,7 @@
#ifdef USE_XNNPACK
#include <xnnpack.h>
#include "caffe2/utils/threadpool/ThreadPoolXNNPACK.h"
namespace at {
namespace native {

View File

@ -110,15 +110,15 @@ Tensor run(
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
padded_input_nhwc.data_ptr<float>(), // input
output.data_ptr<float>(), // output
nullptr); // threadpool
caffe2::xnnpack_threadpool()); // threadpool
TORCH_CHECK(
xnn_status_success == setup_status,
"xnn_setup_convolution2d_nhwc_f32 failed!");
const xnn_status run_status = xnn_run_operator(
context.op.get(), // operator
nullptr); // threadpool
context.op.get(), // operator
caffe2::xnnpack_threadpool()); // threadpool
TORCH_INTERNAL_ASSERT(
xnn_status_success == run_status,

View File

@ -72,15 +72,15 @@ Tensor run(
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
padded_input.data_ptr<float>(), // input
output.data_ptr<float>(), // output
nullptr); // threadpool
caffe2::xnnpack_threadpool()); // threadpool
TORCH_CHECK(
xnn_status_success == setup_status,
"xnn_setup_fully_connected_nc_f32 failed!");
const xnn_status run_status = xnn_run_operator(
context.op.get(), // operator
nullptr); // threadpool
context.op.get(), // operator
caffe2::xnnpack_threadpool()); // threadpool
TORCH_INTERNAL_ASSERT(
xnn_status_success == run_status,

View File

@ -434,6 +434,15 @@ if(USE_XNNPACK)
"${CONFU_DEPENDENCIES_BINARY_DIR}/XNNPACK")
set_property(TARGET XNNPACK PROPERTY POSITION_INDEPENDENT_CODE ON)
# Context: pthreadpool_get_threads_count implementation that is built in pytorch, uses
# implementation defined in caffe2/utils/threadpool/pthreadpool_impl.cc. This implementation
# assumes the the pthreadpool* passed is of type caffe2::ThradPool and thus does reinterpret cast.
# This is not valid when we create pthreadpool via caffe2::xnnpack_threadpool, which is of type
# compatible with new pthreadpool interface and is used in PT's XNNPACK integration.
# Thus all the calls for pthreadpool_get_threads_count originating from XNNPACK must be routed
# appropriately to pthreadpool_get_threads_count_xnnpack, which does not do the aforementioned
# casting to caffe2::ThradPool. Once the threadpools are unified, we will not need this.
target_compile_definitions(XNNPACK PRIVATE -Dpthreadpool_get_threads_count=pthreadpool_get_threads_count_xnnpack)
endif()
include_directories(SYSTEM ${XNNPACK_INCLUDE_DIR})