mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
**Motivation:** In Pytorch, Aten vectorization supports multiple platforms, including x86 and Arm, as well as multiple data types. It provides a generic implementation of Vector (Vec) type that allows the programmer to write code packing various primitives (such as floats) within 256bit & 512bits registers. It can be extended to support other ISAs easily by adding more VecISA sub-classes. **Reference Link:** https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/cpu/vec **This PR:** * Our goal with this contribution is to add support for SVE backend for Vec in the Aten vectorization for CPU backend which can be benefitted by any ARM architecture supported CPU's that supports SVE. * More about SVE ISA for ARM: [https://developer.arm.com/Architectures/Scalable Vector Extensions](https://developer.arm.com/Architectures/Scalable%20Vector%20Extensions) * We are using the ARM C Language Extensions for SVE (https://developer.arm.com/documentation/102699/0100/Optimizing-with-intrinsics ) to accelerate performance for various operators in the SVE backend for Vec. * Currently we are adding support only for SVE ISA with the vector length of 256 bits (SVE 256). In future, we plan to extend this SVE support for other vector lengths as well. Pull Request resolved: https://github.com/pytorch/pytorch/pull/119571 Approved by: https://github.com/malfet, https://github.com/snadampal Co-authored-by: Divya Kotadiya <divya.kotadiya@fujitsu.com>
401 lines
16 KiB
CMake
401 lines
16 KiB
CMake
# This ill-named file does a number of things:
|
|
# - Installs Caffe2 header files (this has nothing to do with code generation)
|
|
# - Configures caffe2/core/macros.h
|
|
# - Creates an ATen target for its generated C++ files and adds it
|
|
# as a dependency
|
|
# - Reads build lists defined in build_variables.bzl
|
|
|
|
################################################################################
|
|
# Helper functions
|
|
################################################################################
|
|
|
|
function(filter_list output input)
|
|
unset(result)
|
|
foreach(filename ${${input}})
|
|
foreach(pattern ${ARGN})
|
|
if("${filename}" MATCHES "${pattern}")
|
|
list(APPEND result "${filename}")
|
|
endif()
|
|
endforeach()
|
|
endforeach()
|
|
set(${output} ${result} PARENT_SCOPE)
|
|
endfunction()
|
|
|
|
function(filter_list_exclude output input)
|
|
unset(result)
|
|
foreach(filename ${${input}})
|
|
foreach(pattern ${ARGN})
|
|
if(NOT "${filename}" MATCHES "${pattern}")
|
|
list(APPEND result "${filename}")
|
|
endif()
|
|
endforeach()
|
|
endforeach()
|
|
set(${output} ${result} PARENT_SCOPE)
|
|
endfunction()
|
|
|
|
################################################################################
|
|
|
|
# ---[ Write the macros file
|
|
configure_file(
|
|
${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
|
|
${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
|
|
|
|
# ---[ Installing the header files
|
|
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
|
|
DESTINATION include
|
|
FILES_MATCHING PATTERN "*.h")
|
|
if(NOT INTERN_BUILD_ATEN_OPS)
|
|
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
|
|
DESTINATION include/ATen
|
|
FILES_MATCHING PATTERN "*.h")
|
|
endif()
|
|
install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h
|
|
DESTINATION include/caffe2/core)
|
|
|
|
# ---[ ATen specific
|
|
if(INTERN_BUILD_ATEN_OPS)
|
|
if(MSVC)
|
|
set(OPT_FLAG "/fp:strict ")
|
|
else(MSVC)
|
|
set(OPT_FLAG "-O3 ")
|
|
if("${CMAKE_BUILD_TYPE}" MATCHES "Debug")
|
|
set(OPT_FLAG " ")
|
|
endif()
|
|
endif(MSVC)
|
|
|
|
if(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
|
|
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp")
|
|
endif()
|
|
|
|
file(GLOB_RECURSE all_python "${CMAKE_CURRENT_LIST_DIR}/../torchgen/*.py")
|
|
|
|
# RowwiseScaled.cu requires sm90a flags
|
|
if(USE_CUDA)
|
|
set(ROWWISE_SCALED_MM_FILE "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu")
|
|
|
|
# Get existing arch flags
|
|
torch_cuda_get_nvcc_gencode_flag(EXISTING_ARCH_FLAGS)
|
|
|
|
# Check NVCC version and existing arch flags
|
|
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND
|
|
EXISTING_ARCH_FLAGS MATCHES ".*compute_90.*")
|
|
set_source_files_properties(${ROWWISE_SCALED_MM_FILE}
|
|
PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a")
|
|
endif()
|
|
endif()
|
|
|
|
set(GEN_ROCM_FLAG)
|
|
if(USE_ROCM)
|
|
set(GEN_ROCM_FLAG --rocm)
|
|
endif()
|
|
|
|
set(GEN_MPS_FLAG)
|
|
if(USE_MPS)
|
|
set(GEN_MPS_FLAG --mps)
|
|
endif()
|
|
|
|
set(CUSTOM_BUILD_FLAGS)
|
|
if(INTERN_BUILD_MOBILE)
|
|
if(USE_VULKAN)
|
|
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU Vulkan)
|
|
else()
|
|
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU)
|
|
endif()
|
|
endif()
|
|
|
|
if(SELECTED_OP_LIST)
|
|
if(TRACING_BASED)
|
|
message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
|
|
list(APPEND CUSTOM_BUILD_FLAGS
|
|
--op_selection_yaml_path ${SELECTED_OP_LIST})
|
|
elseif(NOT STATIC_DISPATCH_BACKEND)
|
|
message(WARNING
|
|
"You have to run tracing-based selective build with dynamic dispatch.\n"
|
|
"Switching to STATIC_DISPATCH_BACKEND=CPU."
|
|
)
|
|
set(STATIC_DISPATCH_BACKEND CPU)
|
|
endif()
|
|
endif()
|
|
|
|
if(STATIC_DISPATCH_BACKEND)
|
|
message(STATUS "Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}")
|
|
list(LENGTH STATIC_DISPATCH_BACKEND len)
|
|
list(APPEND CUSTOM_BUILD_FLAGS
|
|
--static_dispatch_backend ${STATIC_DISPATCH_BACKEND})
|
|
endif()
|
|
|
|
# Codegen unboxing
|
|
if(USE_LIGHTWEIGHT_DISPATCH)
|
|
file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
|
|
list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration)
|
|
set(GEN_UNBOXING_COMMAND
|
|
"${Python_EXECUTABLE}" -m tools.jit.gen_unboxing
|
|
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
|
|
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
|
|
)
|
|
if(SELECTED_OP_LIST)
|
|
list(APPEND GEN_UNBOXING_COMMAND
|
|
--TEST_ONLY_op_registration_allowlist_yaml_path "${SELECTED_OP_LIST}")
|
|
endif()
|
|
set("GEN_UNBOXING_COMMAND_sources"
|
|
${GEN_UNBOXING_COMMAND}
|
|
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
|
)
|
|
message(STATUS "Generating sources for lightweight dispatch")
|
|
execute_process(
|
|
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
|
|
RESULT_VARIABLE RETURN_VALUE
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
|
)
|
|
if(NOT RETURN_VALUE EQUAL 0)
|
|
message(FATAL_ERROR "Failed to get generated_unboxing_sources list")
|
|
endif()
|
|
|
|
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
|
|
add_custom_command(
|
|
COMMENT "Generating ATen unboxing sources"
|
|
OUTPUT
|
|
${generated_unboxing_sources}
|
|
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
|
COMMAND ${GEN_UNBOXING_COMMAND_sources}
|
|
DEPENDS ${all_unboxing_script} ${sources_templates}
|
|
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
|
|
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
|
)
|
|
else() # Otherwise do not generate or include sources into build.
|
|
set(generated_unboxing_sources "")
|
|
endif()
|
|
|
|
set(GEN_PER_OPERATOR_FLAG)
|
|
if(USE_PER_OPERATOR_HEADERS)
|
|
list(APPEND GEN_PER_OPERATOR_FLAG "--per-operator-headers")
|
|
endif()
|
|
|
|
set(GEN_COMMAND
|
|
"${Python_EXECUTABLE}" -m torchgen.gen
|
|
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
|
|
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
|
|
${GEN_PER_OPERATOR_FLAG}
|
|
${GEN_ROCM_FLAG}
|
|
${GEN_MPS_FLAG}
|
|
${CUSTOM_BUILD_FLAGS}
|
|
)
|
|
|
|
file(GLOB_RECURSE headers_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
|
|
file(GLOB_RECURSE sources_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
|
|
set(declarations_yaml_templates "")
|
|
|
|
foreach(gen_type "headers" "sources" "declarations_yaml")
|
|
# The codegen outputs may change dynamically as PyTorch is
|
|
# developed, but add_custom_command only supports dynamic inputs.
|
|
#
|
|
# We work around this by generating a .cmake file which is
|
|
# included below to set the list of output files. If that file
|
|
# ever changes then cmake will be re-run automatically because it
|
|
# was included and so we get fully dynamic outputs.
|
|
|
|
set("GEN_COMMAND_${gen_type}"
|
|
${GEN_COMMAND}
|
|
--generate ${gen_type}
|
|
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
|
|
)
|
|
|
|
# Dry run to bootstrap the output variables
|
|
execute_process(
|
|
COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
|
|
RESULT_VARIABLE RETURN_VALUE
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
|
)
|
|
|
|
if(NOT RETURN_VALUE EQUAL 0)
|
|
message(FATAL_ERROR "Failed to get generated_${gen_type} list")
|
|
endif()
|
|
|
|
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake")
|
|
include("${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
|
|
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
|
|
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
|
|
include("${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake")
|
|
|
|
message(STATUS "${gen_type} outputs: ${gen_outputs}")
|
|
|
|
add_custom_command(
|
|
COMMENT "Generating ATen ${gen_type}"
|
|
OUTPUT
|
|
${generated_${gen_type}}
|
|
${cuda_generated_${gen_type}}
|
|
${core_generated_${gen_type}}
|
|
${cpu_vec_generated_${gen_type}}
|
|
${ops_generated_${gen_type}}
|
|
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
|
|
${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
|
|
${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
|
|
${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
|
|
${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake
|
|
COMMAND ${GEN_COMMAND_${gen_type}}
|
|
DEPENDS ${all_python} ${${gen_type}_templates}
|
|
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
|
|
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
|
)
|
|
endforeach()
|
|
|
|
# Generated headers used from a CUDA (.cu) file are
|
|
# not tracked correctly in CMake. We make the libATen.so depend explicitly
|
|
# on building the generated ATen files to workaround.
|
|
add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS
|
|
${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers}
|
|
${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources}
|
|
${generated_declarations_yaml} ${generated_unboxing_sources})
|
|
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS
|
|
${cuda_generated_headers} ${cuda_generated_sources})
|
|
add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE)
|
|
add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE)
|
|
add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET)
|
|
add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET)
|
|
|
|
if(USE_PER_OPERATOR_HEADERS)
|
|
target_compile_definitions(ATEN_CPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
|
|
target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
|
|
endif()
|
|
|
|
# Handle source files that need to be compiled multiple times for
|
|
# different vectorization options
|
|
file(GLOB cpu_kernel_cpp_in "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp" "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
|
|
|
|
list(APPEND CPU_CAPABILITY_NAMES "DEFAULT")
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}")
|
|
|
|
if(CXX_AVX512_FOUND)
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX512_CPU_DEFINITION")
|
|
list(APPEND CPU_CAPABILITY_NAMES "AVX512")
|
|
if(MSVC)
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
|
|
else(MSVC)
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
|
|
endif(MSVC)
|
|
endif(CXX_AVX512_FOUND)
|
|
|
|
if(CXX_AVX2_FOUND)
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION")
|
|
|
|
# Some versions of GCC pessimistically split unaligned load and store
|
|
# instructions when using the default tuning. This is a bad choice on
|
|
# new Intel and AMD processors so we disable it when compiling with AVX2.
|
|
# See https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
|
|
check_cxx_compiler_flag("-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
|
if(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
|
set(CPU_NO_AVX256_SPLIT_FLAGS "-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
|
|
endif(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
|
|
|
list(APPEND CPU_CAPABILITY_NAMES "AVX2")
|
|
if(DEFINED ENV{ATEN_AVX512_256})
|
|
if($ENV{ATEN_AVX512_256} MATCHES "TRUE")
|
|
if(CXX_AVX512_FOUND)
|
|
message("-- ATen AVX2 kernels will use 32 ymm registers")
|
|
if(MSVC)
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
|
|
else(MSVC)
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
|
|
endif(MSVC)
|
|
endif(CXX_AVX512_FOUND)
|
|
endif()
|
|
else()
|
|
if(MSVC)
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX2")
|
|
else(MSVC)
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
|
|
endif(MSVC)
|
|
endif()
|
|
endif(CXX_AVX2_FOUND)
|
|
|
|
if(CXX_VSX_FOUND)
|
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
|
|
LIST(APPEND CPU_CAPABILITY_NAMES "VSX")
|
|
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}")
|
|
endif(CXX_VSX_FOUND)
|
|
|
|
if(CXX_ZVECTOR_FOUND)
|
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
|
|
LIST(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
|
|
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}")
|
|
endif(CXX_ZVECTOR_FOUND)
|
|
|
|
if(CXX_SVE_FOUND)
|
|
if(CXX_SVE256_FOUND)
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION")
|
|
list(APPEND CPU_CAPABILITY_NAMES "SVE256")
|
|
if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8.2-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
|
else()
|
|
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8.2-a+sve -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
|
endif()
|
|
endif(CXX_SVE256_FOUND)
|
|
endif(CXX_SVE_FOUND)
|
|
|
|
list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
|
|
math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")
|
|
|
|
# The sources list might get reordered later based on the capabilites.
|
|
# See NOTE [ Linking AVX and non-AVX files ]
|
|
foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
|
|
function(process_vec NAME)
|
|
list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
|
|
set(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
|
|
configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in" ${NEW_IMPL})
|
|
set(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp} PARENT_SCOPE) # Create list of copies
|
|
list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
|
|
if(MSVC)
|
|
set(EXTRA_FLAGS "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}")
|
|
else(MSVC)
|
|
set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}")
|
|
endif(MSVC)
|
|
# Disable certain warnings for GCC-9.X
|
|
if(CMAKE_COMPILER_IS_GNUCXX)
|
|
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
|
|
# See https://github.com/pytorch/pytorch/issues/38855
|
|
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-uninitialized")
|
|
endif()
|
|
if("${NAME}" STREQUAL "native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
|
|
# See https://github.com/pytorch/pytorch/issues/38854
|
|
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-deprecated-copy")
|
|
endif()
|
|
endif()
|
|
set_source_files_properties(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
|
|
endfunction()
|
|
foreach(IMPL ${cpu_kernel_cpp_in})
|
|
file(RELATIVE_PATH NAME "${PROJECT_SOURCE_DIR}/aten/src/ATen/" "${IMPL}")
|
|
process_vec("${NAME}")
|
|
endforeach()
|
|
foreach(IMPL ${cpu_vec_generated_sources})
|
|
file(RELATIVE_PATH NAME "${CMAKE_BINARY_DIR}/aten/src/ATen/" "${IMPL}")
|
|
process_vec("${NAME}")
|
|
endforeach()
|
|
endforeach()
|
|
list(APPEND ATen_CPU_SRCS ${cpu_kernel_cpp})
|
|
endif()
|
|
|
|
function(append_filelist name outputvar)
|
|
set(_rootdir "${Torch_SOURCE_DIR}/")
|
|
# configure_file adds its input to the list of CMAKE_RERUN dependencies
|
|
configure_file(
|
|
${PROJECT_SOURCE_DIR}/build_variables.bzl
|
|
${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
|
|
execute_process(
|
|
COMMAND "${Python_EXECUTABLE}" -c
|
|
"exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
|
|
WORKING_DIRECTORY "${_rootdir}"
|
|
RESULT_VARIABLE _retval
|
|
OUTPUT_VARIABLE _tempvar)
|
|
if(NOT _retval EQUAL 0)
|
|
message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
|
|
endif()
|
|
string(REPLACE "\n" "" _tempvar "${_tempvar}")
|
|
list(APPEND ${outputvar} ${_tempvar})
|
|
set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
|
|
endfunction()
|
|
|
|
set(NUM_CPU_CAPABILITY_NAMES ${NUM_CPU_CAPABILITY_NAMES} PARENT_SCOPE)
|
|
set(CPU_CAPABILITY_FLAGS ${CPU_CAPABILITY_FLAGS} PARENT_SCOPE)
|