mirror of
https://github.com/zebrajr/ollama.git
synced 2025-12-06 12:19:56 +01:00
Always filter devices (#12108)
* Always filter devices Avoid crashing on unsupported AMD iGPUs * Remove cuda device filtering This interferes with mixed setups
This commit is contained in:
parent
4383a3ab7a
commit
ead4a9a1d0
|
|
@ -277,6 +277,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||||
FreeMemory: (totalMemory - usedMemory),
|
FreeMemory: (totalMemory - usedMemory),
|
||||||
},
|
},
|
||||||
ID: ID,
|
ID: ID,
|
||||||
|
filterID: gpuOrdinalID,
|
||||||
Name: name,
|
Name: name,
|
||||||
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
|
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
|
||||||
MinimumMemory: rocmMinimumMemory,
|
MinimumMemory: rocmMinimumMemory,
|
||||||
|
|
@ -394,7 +395,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||||
|
|
||||||
// Check for env var workarounds
|
// Check for env var workarounds
|
||||||
if name == "1002:687f" { // Vega RX 56
|
if name == "1002:687f" { // Vega RX 56
|
||||||
gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"})
|
gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, "HSA_ENABLE_SDMA=0")
|
||||||
}
|
}
|
||||||
|
|
||||||
// The GPU has passed all the verification steps and is supported
|
// The GPU has passed all the verification steps and is supported
|
||||||
|
|
@ -523,19 +524,26 @@ func verifyKFDDriverAccess() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
||||||
ids := []string{}
|
ids := []string{}
|
||||||
for _, info := range gpuInfo {
|
for _, info := range gpuInfo {
|
||||||
if info.Library != "rocm" {
|
if info.Library != "rocm" {
|
||||||
// TODO shouldn't happen if things are wired correctly...
|
|
||||||
slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ids = append(ids, info.ID)
|
// If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number
|
||||||
|
if _, err := strconv.Atoi(info.ID); err == nil {
|
||||||
|
ids = append(ids, fmt.Sprintf("%d", info.filterID))
|
||||||
|
} else {
|
||||||
|
ids = append(ids, info.ID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if len(ids) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// There are 3 potential env vars to use to select GPUs.
|
// There are 3 potential env vars to use to select GPUs.
|
||||||
// ROCR_VISIBLE_DEVICES supports UUID or numeric so is our preferred on linux
|
// ROCR_VISIBLE_DEVICES supports UUID or numeric so is our preferred on linux
|
||||||
// GPU_DEVICE_ORDINAL supports numeric IDs only
|
// GPU_DEVICE_ORDINAL supports numeric IDs only
|
||||||
// HIP_VISIBLE_DEVICES supports numeric IDs only
|
// HIP_VISIBLE_DEVICES supports numeric IDs only
|
||||||
return "ROCR_VISIBLE_DEVICES", strings.Join(ids, ",")
|
return "ROCR_VISIBLE_DEVICES=" + strings.Join(ids, ",")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||||
UnreliableFreeMemory: true,
|
UnreliableFreeMemory: true,
|
||||||
|
|
||||||
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
|
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
|
||||||
|
filterID: i,
|
||||||
DependencyPath: []string{libDir},
|
DependencyPath: []string{libDir},
|
||||||
MinimumMemory: rocmMinimumMemory,
|
MinimumMemory: rocmMinimumMemory,
|
||||||
Name: name,
|
Name: name,
|
||||||
|
|
@ -200,19 +201,26 @@ func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
|
||||||
ids := []string{}
|
ids := []string{}
|
||||||
for _, info := range gpuInfo {
|
for _, info := range gpuInfo {
|
||||||
if info.Library != "rocm" {
|
if info.Library != "rocm" {
|
||||||
// TODO shouldn't happen if things are wired correctly...
|
|
||||||
slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ids = append(ids, info.ID)
|
// If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number
|
||||||
|
if _, err := strconv.Atoi(info.ID); err == nil {
|
||||||
|
ids = append(ids, fmt.Sprintf("%d", info.filterID))
|
||||||
|
} else {
|
||||||
|
ids = append(ids, info.ID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if len(ids) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// There are 3 potential env vars to use to select GPUs.
|
// There are 3 potential env vars to use to select GPUs.
|
||||||
// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
|
// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
|
||||||
// HIP_VISIBLE_DEVICES supports numeric IDs only
|
// HIP_VISIBLE_DEVICES supports numeric IDs only
|
||||||
// GPU_DEVICE_ORDINAL supports numeric IDs only
|
// GPU_DEVICE_ORDINAL supports numeric IDs only
|
||||||
return "HIP_VISIBLE_DEVICES", strings.Join(ids, ",")
|
return "HIP_VISIBLE_DEVICES=" + strings.Join(ids, ",")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,19 +16,6 @@ import (
|
||||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||||
|
|
||||||
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
|
||||||
ids := []string{}
|
|
||||||
for _, info := range gpuInfo {
|
|
||||||
if info.Library != "cuda" {
|
|
||||||
// TODO shouldn't happen if things are wired correctly...
|
|
||||||
slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ids = append(ids, info.ID)
|
|
||||||
}
|
|
||||||
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
|
|
||||||
}
|
|
||||||
|
|
||||||
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||||
if CudaTegra != "" {
|
if CudaTegra != "" {
|
||||||
|
|
|
||||||
|
|
@ -371,6 +371,15 @@ func GetGPUInfo() GpuInfoList {
|
||||||
}
|
}
|
||||||
|
|
||||||
rocmGPUs, err = AMDGetGPUInfo()
|
rocmGPUs, err = AMDGetGPUInfo()
|
||||||
|
|
||||||
|
// The ID field is used in context of the filtered set of GPUS
|
||||||
|
// so we have to replace any of these numeric IDs with their
|
||||||
|
// placement in this set of GPUs
|
||||||
|
for i := range rocmGPUs {
|
||||||
|
if _, err := strconv.Atoi(rocmGPUs[i].ID); err == nil {
|
||||||
|
rocmGPUs[i].ID = strconv.Itoa(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
bootstrapErrors = append(bootstrapErrors, err)
|
||||||
}
|
}
|
||||||
|
|
@ -680,23 +689,16 @@ func getVerboseState() C.uint16_t {
|
||||||
|
|
||||||
// Given the list of GPUs this instantiation is targeted for,
|
// Given the list of GPUs this instantiation is targeted for,
|
||||||
// figure out the visible devices environment variable
|
// figure out the visible devices environment variable
|
||||||
//
|
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
|
||||||
// If different libraries are detected, the first one is what we use
|
|
||||||
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
|
||||||
if len(l) == 0 {
|
if len(l) == 0 {
|
||||||
return "", ""
|
return nil
|
||||||
}
|
}
|
||||||
switch l[0].Library {
|
vd := []string{}
|
||||||
case "cuda":
|
// Only filter the AMD GPUs at this level, let all NVIDIA devices through
|
||||||
return cudaGetVisibleDevicesEnv(l)
|
if tmp := rocmGetVisibleDevicesEnv(l); tmp != "" {
|
||||||
case "rocm":
|
vd = append(vd, tmp)
|
||||||
return rocmGetVisibleDevicesEnv(l)
|
|
||||||
case "oneapi":
|
|
||||||
return oneapiGetVisibleDevicesEnv(l)
|
|
||||||
default:
|
|
||||||
slog.Debug("no filter required for library " + l[0].Library)
|
|
||||||
return "", ""
|
|
||||||
}
|
}
|
||||||
|
return vd
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
func GetSystemInfo() SystemInfo {
|
||||||
|
|
|
||||||
|
|
@ -62,9 +62,9 @@ func GetCPUMem() (memInfo, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
|
||||||
// No-op on darwin
|
// No-op on darwin
|
||||||
return "", ""
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
func GetSystemInfo() SystemInfo {
|
||||||
|
|
|
||||||
|
|
@ -1,21 +0,0 @@
|
||||||
//go:build linux || windows
|
|
||||||
|
|
||||||
package discover
|
|
||||||
|
|
||||||
import (
|
|
||||||
"log/slog"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
|
||||||
ids := []string{}
|
|
||||||
for _, info := range gpuInfo {
|
|
||||||
if info.Library != "oneapi" {
|
|
||||||
// TODO shouldn't happen if things are wired correctly...
|
|
||||||
slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ids = append(ids, info.ID)
|
|
||||||
}
|
|
||||||
return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",")
|
|
||||||
}
|
|
||||||
|
|
@ -27,8 +27,8 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
||||||
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
|
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
|
||||||
DependencyPath []string `json:"lib_path,omitempty"`
|
DependencyPath []string `json:"lib_path,omitempty"`
|
||||||
|
|
||||||
// Extra environment variables specific to the GPU as list of [key,value]
|
// Extra environment variables specific to the GPU as list of [key=value]
|
||||||
EnvWorkarounds [][2]string `json:"envs,omitempty"`
|
EnvWorkarounds []string `json:"envs,omitempty"`
|
||||||
|
|
||||||
// Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
|
// Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
|
||||||
// the FreeMemory is best effort, and may over or under report actual memory usage
|
// the FreeMemory is best effort, and may over or under report actual memory usage
|
||||||
|
|
@ -36,9 +36,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
||||||
UnreliableFreeMemory bool
|
UnreliableFreeMemory bool
|
||||||
|
|
||||||
// GPU information
|
// GPU information
|
||||||
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
||||||
Name string `json:"name"` // user friendly name if available
|
filterID int //nolint:unused,nolintlint // AMD Workaround: The numeric ID of the device used to filter out other devices
|
||||||
Compute string `json:"compute"` // Compute Capability or gfx
|
Name string `json:"name"` // user friendly name if available
|
||||||
|
Compute string `json:"compute"` // Compute Capability or gfx
|
||||||
|
|
||||||
// Driver Information - TODO no need to put this on each GPU
|
// Driver Information - TODO no need to put this on each GPU
|
||||||
DriverMajor int `json:"driver_major,omitempty"`
|
DriverMajor int `json:"driver_major,omitempty"`
|
||||||
|
|
|
||||||
|
|
@ -360,23 +360,28 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||||
|
|
||||||
s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(ggmlPaths, string(filepath.ListSeparator)))
|
s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(ggmlPaths, string(filepath.ListSeparator)))
|
||||||
|
|
||||||
envWorkarounds := [][2]string{}
|
envWorkarounds := []string{}
|
||||||
for _, gpu := range gpus {
|
for _, gpu := range gpus {
|
||||||
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
||||||
}
|
}
|
||||||
|
// Always filter down the set of GPUs in case there are any unsupported devices that might crash
|
||||||
|
envWorkarounds = append(envWorkarounds, gpus.GetVisibleDevicesEnv()...)
|
||||||
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
||||||
|
|
||||||
// Update or add the path variable with our adjusted version
|
// Update or add the path variable with our adjusted version
|
||||||
pathNeeded := true
|
pathNeeded := true
|
||||||
|
envWorkaroundDone := make([]bool, len(envWorkarounds))
|
||||||
for i := range s.cmd.Env {
|
for i := range s.cmd.Env {
|
||||||
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
||||||
if strings.EqualFold(cmp[0], pathEnv) {
|
if strings.EqualFold(cmp[0], pathEnv) {
|
||||||
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
||||||
pathNeeded = false
|
pathNeeded = false
|
||||||
} else if len(envWorkarounds) != 0 {
|
} else if len(envWorkarounds) != 0 {
|
||||||
for _, kv := range envWorkarounds {
|
for j, kv := range envWorkarounds {
|
||||||
if strings.EqualFold(cmp[0], kv[0]) {
|
tmp := strings.SplitN(kv, "=", 2)
|
||||||
s.cmd.Env[i] = kv[0] + "=" + kv[1]
|
if strings.EqualFold(cmp[0], tmp[0]) {
|
||||||
|
s.cmd.Env[i] = kv
|
||||||
|
envWorkaroundDone[j] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -384,6 +389,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||||
if pathNeeded {
|
if pathNeeded {
|
||||||
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
||||||
}
|
}
|
||||||
|
for i, done := range envWorkaroundDone {
|
||||||
|
if !done {
|
||||||
|
s.cmd.Env = append(s.cmd.Env, envWorkarounds[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
slog.Info("starting runner", "cmd", s.cmd)
|
slog.Info("starting runner", "cmd", s.cmd)
|
||||||
slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))
|
slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user