mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
According to the [APL documentation](https://developer.arm.com/documentation/101004/2404/General-information/Arm-Performance-Libraries-example-programs), libraries ending with _mp are OpenMP multi-threaded libraries. When a project is compiled with MSVC and the -openmp flag, the vcomp library (Visual C++ implementation of OpenMP) is used for runtime calls. However, the current APL implementation uses the libomp.dll (LLVM) variant. As a result, there are unexpected behaviors at runtime. --- For Example: ```python import torch # Create a sparse tensor # Input (Sparse Tensor): # [[0, 1], # [1, 0]] indices = torch.tensor([[0, 1], [1, 0]]) values = torch.tensor([1, 1], dtype=torch.float32) size = torch.Size([2, 2]) sparse_tensor = torch.sparse_coo_tensor(indices, values, size) # Convert sparse tensor to dense tensor dense_tensor = sparse_tensor.to_dense() # Expected Output (Dense Tensor): # [[0, 1], # [1, 0]] print("\nDense Tensor:") print(dense_tensor) ``` However, it prints unexpected outputs such as: ```python # [[0, 11], # [10, 0]] ``` The issue arises because the following code does not function as expected at runtime: https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/ParallelOpenMP.h#L30 ```c++ // returns 1 , however since OpenMP is enabled it should return total number of threads int64_t num_threads = omp_get_num_threads(); ``` --- In the runtime, loading multiple OpenMP libraries (in this case `libomp` and `vcomp`) is causing unexpected behaviours. So, we've changed libraries from `_mp` to non `_mp` versions and we used `vcomp` for OpenMP calls. Pull Request resolved: https://github.com/pytorch/pytorch/pull/145215 Approved by: https://github.com/ozanMSFT, https://github.com/malfet Co-authored-by: Ozan Aydin <148207261+ozanMSFT@users.noreply.github.com>
74 lines
2.4 KiB
CMake
74 lines
2.4 KiB
CMake
# - Find APL (Arm Performance Libraries)
|
|
#
|
|
# This module sets the following variables:
|
|
# APL_INCLUDE_SEARCH_PATHS - list of paths to search for APL include files
|
|
# APL_LIB_SEARCH_PATHS - list of paths to search for APL libraries
|
|
# APL_FOUND - set to true if APL is found
|
|
# APL_INCLUDE_DIR - path to include dir.
|
|
# APL_LIB_DIR - path to include dir.
|
|
# APL_LIBRARIES - list of libraries for base APL
|
|
|
|
SET(APL_INCLUDE_SEARCH_PATHS $ENV{ARMPL_DIR}/include)
|
|
SET(APL_LIB_SEARCH_PATHS $ENV{ARMPL_DIR}/lib)
|
|
SET(APL_BIN_SEARCH_PATHS $ENV{ARMPL_DIR}/bin)
|
|
|
|
SET(APL_FOUND ON)
|
|
|
|
# Check include file
|
|
FIND_PATH(APL_INCLUDE_DIR NAMES armpl.h PATHS ${APL_INCLUDE_SEARCH_PATHS})
|
|
IF(NOT APL_INCLUDE_DIR)
|
|
SET(APL_FOUND OFF)
|
|
MESSAGE(STATUS "Could not verify APL include directory. Turning APL_FOUND off")
|
|
ENDIF()
|
|
|
|
# Check lib file
|
|
FIND_PATH(APL_LIB_DIR NAMES armpl_lp64.dll.lib libarmpl_lp64.a PATHS ${APL_LIB_SEARCH_PATHS})
|
|
IF(NOT APL_LIB_DIR)
|
|
SET(APL_FOUND OFF)
|
|
MESSAGE(STATUS "Could not verify APL lib directory. Turning APL_FOUND off")
|
|
ENDIF()
|
|
|
|
# Check bin file
|
|
FIND_PATH(APL_BIN_DIR NAMES armpl_lp64.dll libarmpl_lp64.a PATHS ${APL_BIN_SEARCH_PATHS})
|
|
IF(NOT APL_BIN_DIR)
|
|
SET(APL_FOUND OFF)
|
|
MESSAGE(STATUS "Could not verify APL bin directory. Turning APL_FOUND off")
|
|
ENDIF()
|
|
|
|
IF (APL_FOUND)
|
|
IF(WIN32)
|
|
set(APL_LIBRARIES
|
|
"${APL_LIB_DIR}/armpl_lp64.dll.lib"
|
|
)
|
|
set(APL_DLLS
|
|
"${CMAKE_INSTALL_PREFIX}/lib/armpl_lp64.dll"
|
|
)
|
|
add_custom_command(
|
|
OUTPUT ${APL_DLLS}
|
|
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_INSTALL_PREFIX}/lib"
|
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${APL_BIN_DIR}/armpl_lp64.dll" "${CMAKE_INSTALL_PREFIX}/lib/armpl_lp64.dll"
|
|
)
|
|
add_custom_target(copy_apl_dlls ALL DEPENDS ${APL_DLLS})
|
|
ELSEIF(UNIX)
|
|
set(APL_LIBRARIES
|
|
"${APL_LIB_DIR}/libarmpl_lp64.a"
|
|
)
|
|
ENDIF()
|
|
MESSAGE(STATUS "Found APL header: ${APL_INCLUDE_DIR}")
|
|
MESSAGE(STATUS "Found APL library: ${APL_LIB_DIR}")
|
|
message(STATUS "APL_LIBRARIES: ${APL_LIBRARIES}")
|
|
SET(CMAKE_REQUIRED_LIBRARIES ${APL_LIBRARIES})
|
|
include(CheckCSourceRuns)
|
|
CHECK_C_SOURCE_RUNS("
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
float x[4] = { 1, 2, 3, 4 };
|
|
float y[4] = { .1, .01, .001, .0001 };
|
|
extern float cblas_sdot();
|
|
int main() {
|
|
int i;
|
|
double r = cblas_sdot(4, x, 1, y, 1);
|
|
exit((float)r != (float).1234);
|
|
}" BLAS_USE_CBLAS_DOT )
|
|
MESSAGE(STATUS "BLAS_USE_CBLAS_DOT: ${BLAS_USE_CBLAS_DOT}")
|
|
ENDIF (APL_FOUND) |