mirror of
https://github.com/zebrajr/ollama.git
synced 2025-12-06 12:19:56 +01:00
fix: multi-cuda version skew (#12318)
Ensure that in a version skewed multi-cuda setup we use the lowest version for all GPUs
This commit is contained in:
parent
564b558c92
commit
9c5bf342bc
|
|
@ -16,7 +16,7 @@ import (
|
||||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||||
|
|
||||||
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
func cudaVariant(gpuInfos []CudaGPUInfo) string {
|
||||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||||
if CudaTegra != "" {
|
if CudaTegra != "" {
|
||||||
ver := strings.Split(CudaTegra, ".")
|
ver := strings.Split(CudaTegra, ".")
|
||||||
|
|
@ -45,20 +45,19 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check GPU compute capability FIRST
|
// Check GPU compute capability FIRST, lowest common denominator if multi-gpu
|
||||||
isOldGPU := gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5)
|
for _, gpuInfo := range gpuInfos {
|
||||||
if isOldGPU {
|
if gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5) {
|
||||||
// GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1)
|
// GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1)
|
||||||
return "v12"
|
return "v12"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPU is Turing or newer (CC >= 7.5) - can use newer CUDA
|
// GPU is Turing or newer (CC >= 7.5) - can use newer CUDA
|
||||||
if gpuInfo.DriverMajor < 13 {
|
if len(gpuInfos) > 0 && gpuInfos[0].DriverMajor < 13 {
|
||||||
// The detected driver is older than 580 (Aug 2025)
|
// The detected driver is older than 580 (Aug 2025)
|
||||||
// Warn if their CC is compatible with v13 and they should upgrade their driver to get better performance
|
// Warn if their CC is compatible with v13 and they should upgrade their driver to get better performance
|
||||||
if !isOldGPU {
|
slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfos[0].DriverMajor, gpuInfos[0].DriverMinor))
|
||||||
slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor))
|
|
||||||
}
|
|
||||||
return "v12"
|
return "v12"
|
||||||
}
|
}
|
||||||
return "v13"
|
return "v13"
|
||||||
|
|
|
||||||
|
|
@ -284,18 +284,8 @@ func GetGPUInfo() GpuInfoList {
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
gpuInfo.DriverMajor = driverMajor
|
gpuInfo.DriverMajor = driverMajor
|
||||||
gpuInfo.DriverMinor = driverMinor
|
gpuInfo.DriverMinor = driverMinor
|
||||||
variant := cudaVariant(gpuInfo)
|
|
||||||
|
|
||||||
// Start with our bundled libraries
|
|
||||||
if variant != "" {
|
|
||||||
variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
|
|
||||||
if _, err := os.Stat(variantPath); err == nil {
|
|
||||||
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
|
|
||||||
gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
gpuInfo.Variant = variant
|
|
||||||
|
|
||||||
if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
|
if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
|
||||||
unsupportedGPUs = append(unsupportedGPUs,
|
unsupportedGPUs = append(unsupportedGPUs,
|
||||||
|
|
@ -333,6 +323,24 @@ func GetGPUInfo() GpuInfoList {
|
||||||
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
||||||
cudaGPUs = append(cudaGPUs, gpuInfo)
|
cudaGPUs = append(cudaGPUs, gpuInfo)
|
||||||
}
|
}
|
||||||
|
// Second pass on NVIDIA GPUs to set lowest common denominator variant and DependencyPaths
|
||||||
|
variant := cudaVariant(cudaGPUs)
|
||||||
|
var variantPath string
|
||||||
|
// Start with our bundled libraries
|
||||||
|
if variant != "" {
|
||||||
|
variantPath = filepath.Join(LibOllamaPath, "cuda_"+variant)
|
||||||
|
if _, err := os.Stat(variantPath); err != nil {
|
||||||
|
variantPath = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range cudaGPUs {
|
||||||
|
cudaGPUs[i].Variant = variant
|
||||||
|
if variantPath != "" {
|
||||||
|
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
|
||||||
|
cudaGPUs[i].DependencyPath = append([]string{variantPath}, cudaGPUs[i].DependencyPath...)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Intel
|
// Intel
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user