Set number of threads to be 1 for ARM (#97482) (#98267)

Summary: In highly multi-threaded environment, using # of threads to be matching hardware_concurrency leads to high contention. x86 path actually ends up using different path (MKL path), which results in using 1 thread for x86 as well. Pull Request resolved: https://github.com/pytorch/pytorch/pull/98267 Approved by: https://github.com/malfet
2025-12-07 12:21:27 +01:00 · 2023-04-04 21:24:45 +00:00 · 2023-04-04 21:24:45 +00:00 · 51a978fe7b
commit 51a978fe7b
parent aaae588727
1 changed files with 4 additions and 0 deletions
--- a/aten/src/ATen/ParallelCommon.cpp
+++ b/aten/src/ATen/ParallelCommon.cpp
@ -101,7 +101,11 @@ int intraop_default_num_threads() {
  size_t nthreads = get_env_num_threads("OMP_NUM_THREADS", 0);
  nthreads = get_env_num_threads("MKL_NUM_THREADS", nthreads);
  if (nthreads == 0) {
+#if defined(FBCODE_CAFFE2) && defined(__aarch64__)
+    nthreads = 1;
+#else
    nthreads = TaskThreadPoolBase::defaultNumThreads();
+#endif
  }
  return nthreads;
 #endif