fix: multi-cuda version skew (#12318)

Ensure that in a version skewed multi-cuda setup we use the lowest version for all GPUs
This commit is contained in:
Daniel Hiltgen 2025-09-17 13:05:09 -07:00 committed by GitHub
parent 564b558c92
commit 9c5bf342bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 27 additions and 20 deletions

View File

@ -16,7 +16,7 @@ import (
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK") var CudaTegra string = os.Getenv("JETSON_JETPACK")
func cudaVariant(gpuInfo CudaGPUInfo) string { func cudaVariant(gpuInfos []CudaGPUInfo) string {
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" { if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".") ver := strings.Split(CudaTegra, ".")
@ -45,20 +45,19 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
} }
} }
// Check GPU compute capability FIRST // Check GPU compute capability FIRST, lowest common denominator if multi-gpu
isOldGPU := gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5) for _, gpuInfo := range gpuInfos {
if isOldGPU { if gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5) {
// GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1) // GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1)
return "v12" return "v12"
}
} }
// GPU is Turing or newer (CC >= 7.5) - can use newer CUDA // GPU is Turing or newer (CC >= 7.5) - can use newer CUDA
if gpuInfo.DriverMajor < 13 { if len(gpuInfos) > 0 && gpuInfos[0].DriverMajor < 13 {
// The detected driver is older than 580 (Aug 2025) // The detected driver is older than 580 (Aug 2025)
// Warn if their CC is compatible with v13 and they should upgrade their driver to get better performance // Warn if their CC is compatible with v13 and they should upgrade their driver to get better performance
if !isOldGPU { slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfos[0].DriverMajor, gpuInfos[0].DriverMinor))
slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor))
}
return "v12" return "v12"
} }
return "v13" return "v13"

View File

@ -284,18 +284,8 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.MinimumMemory = cudaMinimumMemory gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor gpuInfo.DriverMinor = driverMinor
variant := cudaVariant(gpuInfo)
// Start with our bundled libraries
if variant != "" {
variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
if _, err := os.Stat(variantPath); err == nil {
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
}
}
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.Variant = variant
if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) { if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
unsupportedGPUs = append(unsupportedGPUs, unsupportedGPUs = append(unsupportedGPUs,
@ -333,6 +323,24 @@ func GetGPUInfo() GpuInfoList {
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does... // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
cudaGPUs = append(cudaGPUs, gpuInfo) cudaGPUs = append(cudaGPUs, gpuInfo)
} }
// Second pass on NVIDIA GPUs to set lowest common denominator variant and DependencyPaths
variant := cudaVariant(cudaGPUs)
var variantPath string
// Start with our bundled libraries
if variant != "" {
variantPath = filepath.Join(LibOllamaPath, "cuda_"+variant)
if _, err := os.Stat(variantPath); err != nil {
variantPath = ""
}
}
for i := range cudaGPUs {
cudaGPUs[i].Variant = variant
if variantPath != "" {
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
cudaGPUs[i].DependencyPath = append([]string{variantPath}, cudaGPUs[i].DependencyPath...)
}
}
} }
// Intel // Intel