mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Build fbgemm_gpu for TORCH_CUDA_ARCH_LIST=10.0 and CUDA 12.8 and 12.9 (#162544)
## Summary
- pytorch is not built for *a variants of SM architectures, due to non-portability. However, we need fbgemm_gpu kernels built for sm100a (see #162209)
## Changes
- **Setting USE_FBGEMM_GENAI for CUDA builds**: fbgemm_gpu builds for sm100a if using CUDA 12.8 or 12.9 ([source](2033a0a08f/.github/scripts/nova_dir.bash (L29-L32))), so I follow the same rule here.
- **Extra nvcc flags**: if USE_FBGEMM_GENAI and USE_CUDA are set, we add extra nvcc flags for sm100a
## Test plan
Test build:
```
echo $CUDA_HOME
/usr/local/cuda-12.9
export TORCH_CUDA_ARCH_LIST=10.0
python -m pip install --no-build-isolation -v -e .
```
Check build logs:
```
CMake Warning at CMakeLists.txt:901 (message):
Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a
```
Run unit tests:
- `pytest test/test_matmul_cuda.py -k test_mxfp8_scaled_grouped_mm`
Pull Request resolved: https://github.com/pytorch/pytorch/pull/162544
Approved by: https://github.com/drisspg
This commit is contained in:
parent
f4aeceaa9d
commit
e0c910149c
|
|
@ -902,9 +902,9 @@ IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH)
|
|||
set(USE_FBGEMM_GENAI off)
|
||||
endif()
|
||||
|
||||
# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100
|
||||
if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0a")
|
||||
message(WARNING "Setting USE_FBGEMM_GENAI to ON for CUDA build on SM100")
|
||||
# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100.
|
||||
if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
|
||||
message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a")
|
||||
set(USE_FBGEMM_GENAI ON)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -265,6 +265,14 @@ IF(USE_FBGEMM_GENAI)
|
|||
"${FBGEMM_GENAI_SRCS}/cutlass_extensions/**/*.cu")
|
||||
list(FILTER fbgemm_genai_native_cuda_cu INCLUDE REGEX ${FBGEMM_CUTLASS_KERNELS_REGEX})
|
||||
|
||||
# PyTorch is not built for 10.0a in CI, due to lack of portability,
|
||||
# so we need to explicitly build these files for 10.0a.
|
||||
foreach(cu_file ${fbgemm_genai_native_cuda_cu})
|
||||
_BUILD_FOR_ADDITIONAL_ARCHS(
|
||||
"${cu_file}"
|
||||
"100a")
|
||||
endforeach()
|
||||
|
||||
file(GLOB_RECURSE fbgemm_genai_native_cuda_cpp
|
||||
"${FBGEMM_GENAI_SRCS}/common/*.cpp"
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user