Merge pull request #27537 from johnnynunez:patch-2

Refactor Blackwell #27537

In CUDA 13:
- 10.0 is b100/b200 same for aarch64 (gb200)
- 10.3 is GB300
- 11.0 is Thor with new OpenRm driver (moves to SBSA)
- 12.0 is RTX/RTX PRO
- 12.1 is Spark GB10

Thor was moved from 10.1 to 11.0 and Spark is 12.1.
Related patch: https://github.com/pytorch/pytorch/pull/156176
This commit is contained in:
Johnny 2025-08-20 15:29:31 +07:00 committed by GitHub
parent 32c6aee53d
commit 1e37d84e3a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -109,7 +109,7 @@ macro(ocv_initialize_nvidia_device_generations)
set(_arch_ampere "8.0;8.6")
set(_arch_lovelace "8.9")
set(_arch_hopper "9.0")
set(_arch_blackwell "10.0;12.0")
set(_arch_blackwell "10.0;10.3;11.0;12.0;12.1")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND _generations "Auto")
endif()
@ -273,14 +273,15 @@ macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
endif()
if(NOT _nvcc_res EQUAL 0)
message(STATUS "CUDA: Automatic detection of CUDA generation failed. Going to build for all known architectures")
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) Thor (10.1)
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7) Thor (11.0) Spark (12.1)
ocv_filter_available_architecture(${nvcc_executable} __cuda_arch_bin
5.3
6.2
7.2
7.0
8.7
10.1
11.0
12.1
)
else()
set(__cuda_arch_bin "${_nvcc_out}")