mirror of
https://github.com/zebrajr/ollama.git
synced 2025-12-06 12:19:56 +01:00
ggml: Disable unused pipeline parallelism
We're not currently using it, even in cases where we could. Disabling it improves generation performance by 10-30% with multiple GPUs.
This commit is contained in:
parent
f8a6e88819
commit
9a43994c45
|
|
@ -418,7 +418,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||
(*C.ggml_backend_buffer_type_t)(unsafe.Pointer(&schedBufts[0])),
|
||||
C.int(len(schedBackends)),
|
||||
C.size_t(maxGraphNodes),
|
||||
C._Bool(len(gpus) > 1 && slices.Contains(gpus, output.d)),
|
||||
C._Bool(false),
|
||||
C._Bool(false),
|
||||
),
|
||||
schedBackends: schedBackends,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user