diff --git a/llm/server.go b/llm/server.go index b9ffdc6c..b8d3cdd3 100644 --- a/llm/server.go +++ b/llm/server.go @@ -678,8 +678,12 @@ func (s *ollamaServer) Load(ctx context.Context, gpus discover.GpuInfoList, requ if !(len(gpus) == 1 && gpus[0].Library == "cpu") { for _, gpu := range gpus { + available := gpu.FreeMemory - envconfig.GpuOverhead() - gpu.MinimumMemory + if gpu.FreeMemory < envconfig.GpuOverhead()+gpu.MinimumMemory { + available = 0 + } slog.Info("gpu memory", "id", gpu.ID, - "available", format.HumanBytes2(gpu.FreeMemory-envconfig.GpuOverhead()-gpu.MinimumMemory), + "available", format.HumanBytes2(available), "free", format.HumanBytes2(gpu.FreeMemory), "minimum", format.HumanBytes2(gpu.MinimumMemory), "overhead", format.HumanBytes2(envconfig.GpuOverhead()))