diff --git a/discover/amd_linux.go b/discover/amd_linux.go index ebffbdf6..0f2aa067 100644 --- a/discover/amd_linux.go +++ b/discover/amd_linux.go @@ -277,6 +277,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) { FreeMemory: (totalMemory - usedMemory), }, ID: ID, + filterID: gpuOrdinalID, Name: name, Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch), MinimumMemory: rocmMinimumMemory, @@ -394,7 +395,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) { // Check for env var workarounds if name == "1002:687f" { // Vega RX 56 - gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"}) + gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, "HSA_ENABLE_SDMA=0") } // The GPU has passed all the verification steps and is supported @@ -523,19 +524,26 @@ func verifyKFDDriverAccess() error { return nil } -func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { +func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string { ids := []string{} for _, info := range gpuInfo { if info.Library != "rocm" { - // TODO shouldn't happen if things are wired correctly... - slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library) continue } - ids = append(ids, info.ID) + // If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number + if _, err := strconv.Atoi(info.ID); err == nil { + ids = append(ids, fmt.Sprintf("%d", info.filterID)) + } else { + ids = append(ids, info.ID) + } } + if len(ids) == 0 { + return "" + } + // There are 3 potential env vars to use to select GPUs. // ROCR_VISIBLE_DEVICES supports UUID or numeric so is our preferred on linux // GPU_DEVICE_ORDINAL supports numeric IDs only // HIP_VISIBLE_DEVICES supports numeric IDs only - return "ROCR_VISIBLE_DEVICES", strings.Join(ids, ",") + return "ROCR_VISIBLE_DEVICES=" + strings.Join(ids, ",") } diff --git a/discover/amd_windows.go b/discover/amd_windows.go index 0659d12f..08608ad1 100644 --- a/discover/amd_windows.go +++ b/discover/amd_windows.go @@ -111,6 +111,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) { UnreliableFreeMemory: true, ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices + filterID: i, DependencyPath: []string{libDir}, MinimumMemory: rocmMinimumMemory, Name: name, @@ -200,19 +201,26 @@ func (gpus RocmGPUInfoList) RefreshFreeMemory() error { return nil } -func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { +func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string { ids := []string{} for _, info := range gpuInfo { if info.Library != "rocm" { - // TODO shouldn't happen if things are wired correctly... - slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library) continue } - ids = append(ids, info.ID) + // If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number + if _, err := strconv.Atoi(info.ID); err == nil { + ids = append(ids, fmt.Sprintf("%d", info.filterID)) + } else { + ids = append(ids, info.ID) + } } + if len(ids) == 0 { + return "" + } + // There are 3 potential env vars to use to select GPUs. // ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows // HIP_VISIBLE_DEVICES supports numeric IDs only // GPU_DEVICE_ORDINAL supports numeric IDs only - return "HIP_VISIBLE_DEVICES", strings.Join(ids, ",") + return "HIP_VISIBLE_DEVICES=" + strings.Join(ids, ",") } diff --git a/discover/cuda_common.go b/discover/cuda_common.go index 3c7cb669..b539f6b3 100644 --- a/discover/cuda_common.go +++ b/discover/cuda_common.go @@ -16,19 +16,6 @@ import ( // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. var CudaTegra string = os.Getenv("JETSON_JETPACK") -func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { - ids := []string{} - for _, info := range gpuInfo { - if info.Library != "cuda" { - // TODO shouldn't happen if things are wired correctly... - slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library) - continue - } - ids = append(ids, info.ID) - } - return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") -} - func cudaVariant(gpuInfo CudaGPUInfo) string { if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { if CudaTegra != "" { diff --git a/discover/gpu.go b/discover/gpu.go index f6e3c9cb..b0962611 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -371,6 +371,15 @@ func GetGPUInfo() GpuInfoList { } rocmGPUs, err = AMDGetGPUInfo() + + // The ID field is used in context of the filtered set of GPUS + // so we have to replace any of these numeric IDs with their + // placement in this set of GPUs + for i := range rocmGPUs { + if _, err := strconv.Atoi(rocmGPUs[i].ID); err == nil { + rocmGPUs[i].ID = strconv.Itoa(i) + } + } if err != nil { bootstrapErrors = append(bootstrapErrors, err) } @@ -680,23 +689,16 @@ func getVerboseState() C.uint16_t { // Given the list of GPUs this instantiation is targeted for, // figure out the visible devices environment variable -// -// If different libraries are detected, the first one is what we use -func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { +func (l GpuInfoList) GetVisibleDevicesEnv() []string { if len(l) == 0 { - return "", "" + return nil } - switch l[0].Library { - case "cuda": - return cudaGetVisibleDevicesEnv(l) - case "rocm": - return rocmGetVisibleDevicesEnv(l) - case "oneapi": - return oneapiGetVisibleDevicesEnv(l) - default: - slog.Debug("no filter required for library " + l[0].Library) - return "", "" + vd := []string{} + // Only filter the AMD GPUs at this level, let all NVIDIA devices through + if tmp := rocmGetVisibleDevicesEnv(l); tmp != "" { + vd = append(vd, tmp) } + return vd } func GetSystemInfo() SystemInfo { diff --git a/discover/gpu_darwin.go b/discover/gpu_darwin.go index dd5bf6e2..29b44ff5 100644 --- a/discover/gpu_darwin.go +++ b/discover/gpu_darwin.go @@ -62,9 +62,9 @@ func GetCPUMem() (memInfo, error) { }, nil } -func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { +func (l GpuInfoList) GetVisibleDevicesEnv() []string { // No-op on darwin - return "", "" + return nil } func GetSystemInfo() SystemInfo { diff --git a/discover/gpu_oneapi.go b/discover/gpu_oneapi.go deleted file mode 100644 index 77941f5b..00000000 --- a/discover/gpu_oneapi.go +++ /dev/null @@ -1,21 +0,0 @@ -//go:build linux || windows - -package discover - -import ( - "log/slog" - "strings" -) - -func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { - ids := []string{} - for _, info := range gpuInfo { - if info.Library != "oneapi" { - // TODO shouldn't happen if things are wired correctly... - slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library) - continue - } - ids = append(ids, info.ID) - } - return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",") -} diff --git a/discover/types.go b/discover/types.go index 13a030fd..1027aaac 100644 --- a/discover/types.go +++ b/discover/types.go @@ -27,8 +27,8 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"? // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly DependencyPath []string `json:"lib_path,omitempty"` - // Extra environment variables specific to the GPU as list of [key,value] - EnvWorkarounds [][2]string `json:"envs,omitempty"` + // Extra environment variables specific to the GPU as list of [key=value] + EnvWorkarounds []string `json:"envs,omitempty"` // Set to true if we can NOT reliably discover FreeMemory. A value of true indicates // the FreeMemory is best effort, and may over or under report actual memory usage @@ -36,9 +36,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"? UnreliableFreeMemory bool // GPU information - ID string `json:"gpu_id"` // string to use for selection of this specific GPU - Name string `json:"name"` // user friendly name if available - Compute string `json:"compute"` // Compute Capability or gfx + ID string `json:"gpu_id"` // string to use for selection of this specific GPU + filterID int //nolint:unused,nolintlint // AMD Workaround: The numeric ID of the device used to filter out other devices + Name string `json:"name"` // user friendly name if available + Compute string `json:"compute"` // Compute Capability or gfx // Driver Information - TODO no need to put this on each GPU DriverMajor int `json:"driver_major,omitempty"` diff --git a/llm/server.go b/llm/server.go index 30cf5c36..b9ffdc6c 100644 --- a/llm/server.go +++ b/llm/server.go @@ -360,23 +360,28 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(ggmlPaths, string(filepath.ListSeparator))) - envWorkarounds := [][2]string{} + envWorkarounds := []string{} for _, gpu := range gpus { envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...) } + // Always filter down the set of GPUs in case there are any unsupported devices that might crash + envWorkarounds = append(envWorkarounds, gpus.GetVisibleDevicesEnv()...) pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator)) // Update or add the path variable with our adjusted version pathNeeded := true + envWorkaroundDone := make([]bool, len(envWorkarounds)) for i := range s.cmd.Env { cmp := strings.SplitN(s.cmd.Env[i], "=", 2) if strings.EqualFold(cmp[0], pathEnv) { s.cmd.Env[i] = pathEnv + "=" + pathEnvVal pathNeeded = false } else if len(envWorkarounds) != 0 { - for _, kv := range envWorkarounds { - if strings.EqualFold(cmp[0], kv[0]) { - s.cmd.Env[i] = kv[0] + "=" + kv[1] + for j, kv := range envWorkarounds { + tmp := strings.SplitN(kv, "=", 2) + if strings.EqualFold(cmp[0], tmp[0]) { + s.cmd.Env[i] = kv + envWorkaroundDone[j] = true } } } @@ -384,6 +389,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a if pathNeeded { s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal) } + for i, done := range envWorkaroundDone { + if !done { + s.cmd.Env = append(s.cmd.Env, envWorkarounds[i]) + } + } slog.Info("starting runner", "cmd", s.cmd) slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))