diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch new file mode 100644 index 00000000..a2539034 --- /dev/null +++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch @@ -0,0 +1,102 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Jesse Gross +Date: Thu, 24 Apr 2025 14:48:51 -0700 +Subject: [PATCH] ggml: Export GPU UUIDs + +This enables matching up devices and information reported by the backend +with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml). +--- + ggml/include/ggml-backend.h | 1 + + ggml/src/ggml-cuda/ggml-cuda.cu | 33 ++++++++++++++++++++++++++++++++ + ggml/src/ggml-metal/ggml-metal.m | 1 + + 3 files changed, 35 insertions(+) + +diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h +index 74e46716..a880df33 100644 +--- a/ggml/include/ggml-backend.h ++++ b/ggml/include/ggml-backend.h +@@ -152,6 +152,7 @@ extern "C" { + struct ggml_backend_dev_props { + const char * name; + const char * description; ++ const char * uuid; + size_t memory_free; + size_t memory_total; + enum ggml_backend_dev_type type; +diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu +index cb0d8528..4c829153 100644 +--- a/ggml/src/ggml-cuda/ggml-cuda.cu ++++ b/ggml/src/ggml-cuda/ggml-cuda.cu +@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context { + int device; + std::string name; + std::string description; ++ std::string uuid; + }; + + static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { +@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t + return ctx->description.c_str(); + } + ++static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) { ++ ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ++ return ctx->uuid.c_str(); ++} ++ + static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; + ggml_cuda_set_device(ctx->device); +@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend + static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { + props->name = ggml_backend_cuda_device_get_name(dev); + props->description = ggml_backend_cuda_device_get_description(dev); ++ props->uuid = ggml_backend_cuda_device_get_uuid(dev); + props->type = ggml_backend_cuda_device_get_type(dev); + ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); + +@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { + CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); + dev_ctx->description = prop.name; + ++ #if !defined(GGML_USE_HIP) ++ char uuid[64]; ++ snprintf(uuid, sizeof(uuid), ++ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", ++ (unsigned char)prop.uuid.bytes[0], ++ (unsigned char)prop.uuid.bytes[1], ++ (unsigned char)prop.uuid.bytes[2], ++ (unsigned char)prop.uuid.bytes[3], ++ (unsigned char)prop.uuid.bytes[4], ++ (unsigned char)prop.uuid.bytes[5], ++ (unsigned char)prop.uuid.bytes[6], ++ (unsigned char)prop.uuid.bytes[7], ++ (unsigned char)prop.uuid.bytes[8], ++ (unsigned char)prop.uuid.bytes[9], ++ (unsigned char)prop.uuid.bytes[10], ++ (unsigned char)prop.uuid.bytes[11], ++ (unsigned char)prop.uuid.bytes[12], ++ (unsigned char)prop.uuid.bytes[13], ++ (unsigned char)prop.uuid.bytes[14], ++ (unsigned char)prop.uuid.bytes[15] ++ ); ++ dev_ctx->uuid = uuid; ++ #else ++ dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16); ++ #endif ++ + ggml_backend_dev_t dev = new ggml_backend_device { + /* .iface = */ ggml_backend_cuda_device_interface, + /* .reg = */ ®, +diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m +index 1b56f858..ee4f2dcb 100644 +--- a/ggml/src/ggml-metal/ggml-metal.m ++++ b/ggml/src/ggml-metal/ggml-metal.m +@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen + static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { + props->name = ggml_backend_metal_device_get_name(dev); + props->description = ggml_backend_metal_device_get_description(dev); ++ props->uuid = "0"; + props->type = ggml_backend_metal_device_get_type(dev); + ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total); + props->caps = (struct ggml_backend_dev_caps) { diff --git a/ml/backend.go b/ml/backend.go index 65f16948..2df6c892 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -124,6 +124,10 @@ type DeviceMemory struct { // may not be persistent across instances of the runner. Name string + // UUID is a unique persistent identifier for the device for matching + // with system management libraries + UUID string + // Weights is the per-layer memory needed for the model weights. Weights []Memory @@ -152,6 +156,10 @@ func (m DeviceMemory) LogValue() slog.Value { attrs = append(attrs, slog.Any("Graph", m.Graph)) } + if len(attrs) > 0 && m.UUID != "" { + attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...) + } + return slog.GroupValue(attrs...) } diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 76172ae1..5a9fe67e 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -136,6 +136,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { } requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d)) + var props C.struct_ggml_backend_dev_props + C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props) + requiredMemory.CPU.UUID = C.GoString(props.uuid) requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1) requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1) @@ -150,6 +153,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { }) btDeviceMemory[bt] = &requiredMemory.GPUs[i] requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d)) + var props C.struct_ggml_backend_dev_props + C.ggml_backend_dev_get_props(d, &props) + requiredMemory.GPUs[i].UUID = C.GoString(props.uuid) requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1) requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1) } diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h index 74e46716..a880df33 100644 --- a/ml/backend/ggml/ggml/include/ggml-backend.h +++ b/ml/backend/ggml/ggml/include/ggml-backend.h @@ -152,6 +152,7 @@ extern "C" { struct ggml_backend_dev_props { const char * name; const char * description; + const char * uuid; size_t memory_free; size_t memory_total; enum ggml_backend_dev_type type; diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index cb0d8528..4c829153 100644 --- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context { int device; std::string name; std::string description; + std::string uuid; }; static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { @@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t return ctx->description.c_str(); } +static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) { + ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; + return ctx->uuid.c_str(); +} + static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ggml_cuda_set_device(ctx->device); @@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_cuda_device_get_name(dev); props->description = ggml_backend_cuda_device_get_description(dev); + props->uuid = ggml_backend_cuda_device_get_uuid(dev); props->type = ggml_backend_cuda_device_get_type(dev); ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); @@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); dev_ctx->description = prop.name; + #if !defined(GGML_USE_HIP) + char uuid[64]; + snprintf(uuid, sizeof(uuid), + "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + (unsigned char)prop.uuid.bytes[0], + (unsigned char)prop.uuid.bytes[1], + (unsigned char)prop.uuid.bytes[2], + (unsigned char)prop.uuid.bytes[3], + (unsigned char)prop.uuid.bytes[4], + (unsigned char)prop.uuid.bytes[5], + (unsigned char)prop.uuid.bytes[6], + (unsigned char)prop.uuid.bytes[7], + (unsigned char)prop.uuid.bytes[8], + (unsigned char)prop.uuid.bytes[9], + (unsigned char)prop.uuid.bytes[10], + (unsigned char)prop.uuid.bytes[11], + (unsigned char)prop.uuid.bytes[12], + (unsigned char)prop.uuid.bytes[13], + (unsigned char)prop.uuid.bytes[14], + (unsigned char)prop.uuid.bytes[15] + ); + dev_ctx->uuid = uuid; + #else + dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16); + #endif + ggml_backend_dev_t dev = new ggml_backend_device { /* .iface = */ ggml_backend_cuda_device_interface, /* .reg = */ ®, diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m index 1b56f858..ee4f2dcb 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_metal_device_get_name(dev); props->description = ggml_backend_metal_device_get_description(dev); + props->uuid = "0"; props->type = ggml_backend_metal_device_get_type(dev); ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = (struct ggml_backend_dev_caps) {