mirror of
https://github.com/zebrajr/ollama.git
synced 2025-12-06 12:19:56 +01:00
Fix vulkan PCI ID and ID handling (#12775)
* Fix vulkan PCI ID and ID handling Intel GPUs may not report PCI IDs which was leading to incorrect overlap detection. Switch to using the existing PCI IDs, however AMD GPUs claim not to report PCI IDs, but actually do, so try anyway, as this is required for ADLX to find the GPUs on Windows. Numeric IDs lead to scheduling problems, so this also switches Vulkan to use UUID based IDs. The GPU discovery patches have been squashed into a single patch to simplify future rebases. * review comments
This commit is contained in:
parent
29f63f37c8
commit
14977a9350
|
|
@ -117,7 +117,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||||
|
|
||||||
// In the second pass, we more deeply initialize the GPUs to weed out devices that
|
// In the second pass, we more deeply initialize the GPUs to weed out devices that
|
||||||
// aren't supported by a given library. We run this phase in parallel to speed up discovery.
|
// aren't supported by a given library. We run this phase in parallel to speed up discovery.
|
||||||
slog.Debug("filtering out unsupported or overlapping GPU library combinations", "count", len(devices))
|
slog.Debug("evluating which if any devices to filter out", "initial_count", len(devices))
|
||||||
ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
|
ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
@ -129,7 +129,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||||
if devices[i].Library == "Metal" {
|
if devices[i].Library == "Metal" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
|
slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
@ -155,6 +155,12 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||||
envVar: id, // Filter to just this one GPU
|
envVar: id, // Filter to just this one GPU
|
||||||
}
|
}
|
||||||
if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
|
if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
|
||||||
|
slog.Debug("filtering device which didn't fully initialize",
|
||||||
|
"id", devices[i].ID,
|
||||||
|
"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
|
||||||
|
"pci_id", devices[i].PCIID,
|
||||||
|
"library", devices[i].Library,
|
||||||
|
)
|
||||||
needsDelete[i] = true
|
needsDelete[i] = true
|
||||||
} else {
|
} else {
|
||||||
supportedMu.Lock()
|
supportedMu.Lock()
|
||||||
|
|
@ -170,7 +176,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
logutil.Trace("supported GPU library combinations", "supported", supported)
|
logutil.Trace("supported GPU library combinations before filtering", "supported", supported)
|
||||||
|
|
||||||
filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
|
filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
|
||||||
|
|
||||||
|
|
@ -372,12 +378,13 @@ func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
|
||||||
}
|
}
|
||||||
if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
|
if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
|
||||||
needsDelete[i] = true
|
needsDelete[i] = true
|
||||||
slog.Debug("dropping Vulkan duplicate by PCI ID",
|
slog.Debug("filtering device with duplicate PCI ID",
|
||||||
"vulkan_id", devices[i].ID,
|
"id", devices[i].ID,
|
||||||
"vulkan_libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
|
"library", devices[i].Library,
|
||||||
|
"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
|
||||||
"pci_id", devices[i].PCIID,
|
"pci_id", devices[i].PCIID,
|
||||||
"kept_library", devices[j].Library,
|
|
||||||
"kept_id", devices[j].ID,
|
"kept_id", devices[j].ID,
|
||||||
|
"kept_library", devices[j].Library,
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -422,6 +429,12 @@ func filterOverlapByLibrary(supported map[string]map[string]map[string]int, need
|
||||||
}
|
}
|
||||||
for dev, i := range byLibDirs[libDir] {
|
for dev, i := range byLibDirs[libDir] {
|
||||||
if _, found := byLibDirs[newest][dev]; found {
|
if _, found := byLibDirs[newest][dev]; found {
|
||||||
|
slog.Debug("filtering device with overlapping libraries",
|
||||||
|
"id", dev,
|
||||||
|
"library", libDir,
|
||||||
|
"delete_index", i,
|
||||||
|
"kept_library", newest,
|
||||||
|
)
|
||||||
needsDelete[i] = true
|
needsDelete[i] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package discover
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
|
|
@ -26,6 +27,7 @@ type CPU struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func LogDetails(devices []ml.DeviceInfo) {
|
func LogDetails(devices []ml.DeviceInfo) {
|
||||||
|
sort.Sort(sort.Reverse(ml.ByFreeMemory(devices))) // Report devices in order of scheduling preference
|
||||||
for _, dev := range devices {
|
for _, dev := range devices {
|
||||||
var libs []string
|
var libs []string
|
||||||
for _, dir := range dev.LibraryPath {
|
for _, dir := range dev.LibraryPath {
|
||||||
|
|
@ -39,6 +41,7 @@ func LogDetails(devices []ml.DeviceInfo) {
|
||||||
}
|
}
|
||||||
slog.Info("inference compute",
|
slog.Info("inference compute",
|
||||||
"id", dev.ID,
|
"id", dev.ID,
|
||||||
|
"filtered_id", dev.FilteredID,
|
||||||
"library", dev.Library,
|
"library", dev.Library,
|
||||||
"compute", dev.Compute(),
|
"compute", dev.Compute(),
|
||||||
"name", dev.Name,
|
"name", dev.Name,
|
||||||
|
|
|
||||||
|
|
@ -5,24 +5,33 @@ Subject: [PATCH] GPU discovery enhancements
|
||||||
|
|
||||||
Expose more information about the devices through backend props, and leverage
|
Expose more information about the devices through backend props, and leverage
|
||||||
management libraries for more accurate VRAM usage reporting if available.
|
management libraries for more accurate VRAM usage reporting if available.
|
||||||
|
|
||||||
|
vulkan: get GPU ID (ollama v0.11.5)
|
||||||
|
|
||||||
|
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
|
||||||
|
|
||||||
|
Vulkan PCI and Memory
|
||||||
|
|
||||||
|
fix vulkan PCI ID and ID handling
|
||||||
---
|
---
|
||||||
ggml/include/ggml-backend.h | 11 +
|
ggml/include/ggml-backend.h | 8 +
|
||||||
ggml/src/CMakeLists.txt | 2 +
|
ggml/src/CMakeLists.txt | 2 +
|
||||||
ggml/src/ggml-cuda/ggml-cuda.cu | 74 +++++
|
ggml/src/ggml-cuda/ggml-cuda.cu | 65 ++++
|
||||||
ggml/src/ggml-cuda/vendors/hip.h | 3 +
|
ggml/src/ggml-cuda/vendors/hip.h | 3 +
|
||||||
ggml/src/ggml-impl.h | 8 +
|
ggml/src/ggml-impl.h | 8 +
|
||||||
ggml/src/ggml-metal/ggml-metal.cpp | 2 +
|
ggml/src/ggml-metal/ggml-metal.cpp | 2 +
|
||||||
ggml/src/mem_hip.cpp | 449 +++++++++++++++++++++++++++++
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 212 +++++++++++--
|
||||||
ggml/src/mem_nvml.cpp | 209 ++++++++++++++
|
ggml/src/mem_hip.cpp | 452 +++++++++++++++++++++++++++
|
||||||
8 files changed, 758 insertions(+)
|
ggml/src/mem_nvml.cpp | 209 +++++++++++++
|
||||||
|
9 files changed, 931 insertions(+), 30 deletions(-)
|
||||||
create mode 100644 ggml/src/mem_hip.cpp
|
create mode 100644 ggml/src/mem_hip.cpp
|
||||||
create mode 100644 ggml/src/mem_nvml.cpp
|
create mode 100644 ggml/src/mem_nvml.cpp
|
||||||
|
|
||||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||||
index ba181d09d..094fc3c82 100644
|
index ba181d09d..809835243 100644
|
||||||
--- a/ggml/include/ggml-backend.h
|
--- a/ggml/include/ggml-backend.h
|
||||||
+++ b/ggml/include/ggml-backend.h
|
+++ b/ggml/include/ggml-backend.h
|
||||||
@@ -169,6 +169,17 @@ extern "C" {
|
@@ -169,6 +169,14 @@ extern "C" {
|
||||||
const char * device_id;
|
const char * device_id;
|
||||||
// device capabilities
|
// device capabilities
|
||||||
struct ggml_backend_dev_caps caps;
|
struct ggml_backend_dev_caps caps;
|
||||||
|
|
@ -31,9 +40,6 @@ index ba181d09d..094fc3c82 100644
|
||||||
+ int compute_major;
|
+ int compute_major;
|
||||||
+ int compute_minor;
|
+ int compute_minor;
|
||||||
+ int integrated;
|
+ int integrated;
|
||||||
+ int pci_bus_id;
|
|
||||||
+ int pci_device_id;
|
|
||||||
+ int pci_domain_id;
|
|
||||||
+ const char *library;
|
+ const char *library;
|
||||||
+ // number with which the devices are accessed (Vulkan)
|
+ // number with which the devices are accessed (Vulkan)
|
||||||
+ const char *numeric_id;
|
+ const char *numeric_id;
|
||||||
|
|
@ -54,7 +60,7 @@ index 0609c6503..aefe43bdd 100644
|
||||||
|
|
||||||
target_include_directories(ggml-base PRIVATE .)
|
target_include_directories(ggml-base PRIVATE .)
|
||||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
index 87c6c34a4..816597d2f 100644
|
index 87c6c34a4..b075a18be 100644
|
||||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
|
|
@ -86,7 +92,7 @@ index 87c6c34a4..816597d2f 100644
|
||||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||||
ggml_cuda_parse_uuid(prop, id).c_str());
|
ggml_cuda_parse_uuid(prop, id).c_str());
|
||||||
@@ -3484,6 +3499,14 @@ struct ggml_backend_cuda_device_context {
|
@@ -3484,6 +3499,11 @@ struct ggml_backend_cuda_device_context {
|
||||||
std::string description;
|
std::string description;
|
||||||
std::string pci_bus_id;
|
std::string pci_bus_id;
|
||||||
std::string id;
|
std::string id;
|
||||||
|
|
@ -95,22 +101,19 @@ index 87c6c34a4..816597d2f 100644
|
||||||
+ int driver_major;
|
+ int driver_major;
|
||||||
+ int driver_minor;
|
+ int driver_minor;
|
||||||
+ int integrated;
|
+ int integrated;
|
||||||
+ int pciBusID;
|
|
||||||
+ int pciDeviceID;
|
|
||||||
+ int pciDomainID;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
||||||
@@ -3504,6 +3527,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
|
@@ -3504,6 +3524,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
|
||||||
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||||
ggml_cuda_set_device(ctx->device);
|
ggml_cuda_set_device(ctx->device);
|
||||||
+
|
+
|
||||||
+#if defined(GGML_USE_HIP)
|
+#if defined(GGML_USE_HIP)
|
||||||
+ if (ggml_hip_mgmt_init() == 0) {
|
+ if (ggml_hip_mgmt_init() == 0) {
|
||||||
+ int status = ggml_hip_get_device_memory(ctx->pciBusID, ctx->pciDeviceID, free, total);
|
+ int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
|
||||||
+ if (status == 0) {
|
+ if (status == 0) {
|
||||||
+ GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
+ GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
|
||||||
+ ggml_hip_mgmt_release();
|
+ ggml_hip_mgmt_release();
|
||||||
+ return;
|
+ return;
|
||||||
+ }
|
+ }
|
||||||
|
|
@ -120,7 +123,7 @@ index 87c6c34a4..816597d2f 100644
|
||||||
+ if (ggml_nvml_init() == 0) {
|
+ if (ggml_nvml_init() == 0) {
|
||||||
+ int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
|
+ int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
|
||||||
+ if (status == 0) {
|
+ if (status == 0) {
|
||||||
+ GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
+ GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->id.c_str(), *free, *total);
|
||||||
+ ggml_nvml_release();
|
+ ggml_nvml_release();
|
||||||
+ return;
|
+ return;
|
||||||
+ }
|
+ }
|
||||||
|
|
@ -130,7 +133,7 @@ index 87c6c34a4..816597d2f 100644
|
||||||
CUDA_CHECK(cudaMemGetInfo(free, total));
|
CUDA_CHECK(cudaMemGetInfo(free, total));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3512,6 +3557,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
@@ -3512,6 +3554,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||||
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -138,7 +141,7 @@ index 87c6c34a4..816597d2f 100644
|
||||||
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||||
|
|
||||||
@@ -3525,6 +3571,22 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
@@ -3525,6 +3568,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
||||||
// If you need the memory data, call ggml_backend_dev_memory() explicitly.
|
// If you need the memory data, call ggml_backend_dev_memory() explicitly.
|
||||||
props->memory_total = props->memory_free = 0;
|
props->memory_total = props->memory_free = 0;
|
||||||
|
|
||||||
|
|
@ -153,15 +156,12 @@ index 87c6c34a4..816597d2f 100644
|
||||||
+ props->driver_major = ctx->driver_major;
|
+ props->driver_major = ctx->driver_major;
|
||||||
+ props->driver_minor = ctx->driver_minor;
|
+ props->driver_minor = ctx->driver_minor;
|
||||||
+ props->integrated = ctx->integrated;
|
+ props->integrated = ctx->integrated;
|
||||||
+ props->pci_bus_id = ctx->pciBusID;
|
|
||||||
+ props->pci_device_id = ctx->pciDeviceID;
|
|
||||||
+ props->pci_domain_id = ctx->pciDomainID;
|
|
||||||
+ props->library = GGML_CUDA_NAME;
|
+ props->library = GGML_CUDA_NAME;
|
||||||
+
|
+
|
||||||
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
|
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
|
||||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||||
bool events = false;
|
bool events = false;
|
||||||
@@ -4087,6 +4149,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
@@ -4087,6 +4143,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
|
ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
|
||||||
|
|
@ -169,7 +169,7 @@ index 87c6c34a4..816597d2f 100644
|
||||||
|
|
||||||
for (int i = 0; i < ggml_cuda_info().device_count; i++) {
|
for (int i = 0; i < ggml_cuda_info().device_count; i++) {
|
||||||
ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
|
ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
|
||||||
@@ -4102,6 +4165,17 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
@@ -4102,6 +4159,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||||
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
|
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
|
||||||
dev_ctx->pci_bus_id = pci_bus_id;
|
dev_ctx->pci_bus_id = pci_bus_id;
|
||||||
|
|
||||||
|
|
@ -181,9 +181,6 @@ index 87c6c34a4..816597d2f 100644
|
||||||
+ dev_ctx->driver_major = driverVersion / 1000;
|
+ dev_ctx->driver_major = driverVersion / 1000;
|
||||||
+ dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
|
+ dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
|
||||||
+ dev_ctx->integrated = prop.integrated;
|
+ dev_ctx->integrated = prop.integrated;
|
||||||
+ dev_ctx->pciBusID = prop.pciBusID;
|
|
||||||
+ dev_ctx->pciDeviceID = prop.pciDeviceID;
|
|
||||||
+ dev_ctx->pciDomainID = prop.pciDomainID;
|
|
||||||
ggml_backend_dev_t dev = new ggml_backend_device {
|
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||||
/* .iface = */ ggml_backend_cuda_device_interface,
|
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||||
/* .reg = */ ®,
|
/* .reg = */ ®,
|
||||||
|
|
@ -209,7 +206,7 @@ index 1f06be80e..2f9ef2dc0 100644
|
||||||
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
|
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
|
||||||
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
|
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
|
||||||
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
|
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
|
||||||
index d0fb3bcca..80597b6ea 100644
|
index d0fb3bcca..b63edd0c1 100644
|
||||||
--- a/ggml/src/ggml-impl.h
|
--- a/ggml/src/ggml-impl.h
|
||||||
+++ b/ggml/src/ggml-impl.h
|
+++ b/ggml/src/ggml-impl.h
|
||||||
@@ -638,6 +638,14 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx
|
@@ -638,6 +638,14 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx
|
||||||
|
|
@ -221,7 +218,7 @@ index d0fb3bcca..80597b6ea 100644
|
||||||
+GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
|
+GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
|
||||||
+GGML_API void ggml_nvml_release();
|
+GGML_API void ggml_nvml_release();
|
||||||
+GGML_API int ggml_hip_mgmt_init();
|
+GGML_API int ggml_hip_mgmt_init();
|
||||||
+GGML_API int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total);
|
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
|
||||||
+GGML_API void ggml_hip_mgmt_release();
|
+GGML_API void ggml_hip_mgmt_release();
|
||||||
+
|
+
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
@ -247,12 +244,319 @@ index f2ff9f322..f356e4a0a 100644
|
||||||
props->caps = {
|
props->caps = {
|
||||||
/* .async = */ true,
|
/* .async = */ true,
|
||||||
/* .host_buffer = */ false,
|
/* .host_buffer = */ false,
|
||||||
|
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||||
|
index ed83236f4..0bbcecd01 100644
|
||||||
|
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||||
|
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||||
|
@@ -231,6 +231,7 @@ class vk_memory_logger;
|
||||||
|
#endif
|
||||||
|
class vk_perf_logger;
|
||||||
|
static void ggml_vk_destroy_buffer(vk_buffer& buf);
|
||||||
|
+static std::string ggml_vk_get_device_id(int device);
|
||||||
|
|
||||||
|
static constexpr uint32_t mul_mat_vec_max_cols = 8;
|
||||||
|
static constexpr uint32_t p021_max_gqa_ratio = 8;
|
||||||
|
@@ -11585,6 +11586,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
|
||||||
|
snprintf(description, description_size, "%s", props.deviceName.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
+static std::string ggml_vk_get_device_id(int device) {
|
||||||
|
+ ggml_vk_instance_init();
|
||||||
|
+
|
||||||
|
+ std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
|
||||||
|
+
|
||||||
|
+ vk::PhysicalDeviceProperties2 props;
|
||||||
|
+ vk::PhysicalDeviceIDProperties deviceIDProps;
|
||||||
|
+ props.pNext = &deviceIDProps;
|
||||||
|
+ devices[device].getProperties2(&props);
|
||||||
|
+
|
||||||
|
+ const auto& uuid = deviceIDProps.deviceUUID;
|
||||||
|
+ char id[64];
|
||||||
|
+ snprintf(id, sizeof(id),
|
||||||
|
+ "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||||
|
+ uuid[0], uuid[1], uuid[2], uuid[3],
|
||||||
|
+ uuid[4], uuid[5],
|
||||||
|
+ uuid[6], uuid[7],
|
||||||
|
+ uuid[8], uuid[9],
|
||||||
|
+ uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
|
||||||
|
+ );
|
||||||
|
+ return std::string(id);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// backend interface
|
||||||
|
|
||||||
|
#define UNUSED GGML_UNUSED
|
||||||
|
@@ -12391,31 +12415,103 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
|
||||||
|
ggml_vk_get_device_description(dev_idx, description, description_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
-void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
||||||
|
+std::string ggml_backend_vk_get_device_id(int device) {
|
||||||
|
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
||||||
|
- GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
|
||||||
|
+ int dev_idx = vk_instance.device_indices[device];
|
||||||
|
+ return ggml_vk_get_device_id(dev_idx);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+//////////////////////////
|
||||||
|
+
|
||||||
|
+struct ggml_backend_vk_device_context {
|
||||||
|
+ size_t device;
|
||||||
|
+ std::string name;
|
||||||
|
+ std::string description;
|
||||||
|
+ bool is_integrated_gpu;
|
||||||
|
+ // Combined string id in the form "dddd:bb:dd.f" (domain:bus:device.function)
|
||||||
|
+ std::string pci_id;
|
||||||
|
+ std::string id;
|
||||||
|
+ std::string uuid;
|
||||||
|
+ std::string numeric_id;
|
||||||
|
+ int major;
|
||||||
|
+ int minor;
|
||||||
|
+ int driver_major;
|
||||||
|
+ int driver_minor;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
|
||||||
|
+ GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
|
||||||
|
+ GGML_ASSERT(ctx->device < (int) vk_instance.device_supports_membudget.size());
|
||||||
|
+
|
||||||
|
+ vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
|
||||||
|
|
||||||
|
- vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
||||||
|
- vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
|
||||||
|
- vk::PhysicalDeviceMemoryProperties2 memprops = {};
|
||||||
|
- bool membudget_supported = vk_instance.device_supports_membudget[device];
|
||||||
|
+ vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
|
||||||
|
+ vk::PhysicalDeviceProperties2 props2;
|
||||||
|
+ vkdev.getProperties2(&props2);
|
||||||
|
|
||||||
|
- if (membudget_supported) {
|
||||||
|
- memprops.pNext = &budgetprops;
|
||||||
|
+ if (!ctx->is_integrated_gpu)
|
||||||
|
+ {
|
||||||
|
+ // Use vendor specific management libraries for best VRAM reporting if available
|
||||||
|
+ switch (props2.properties.vendorID) {
|
||||||
|
+ case VK_VENDOR_ID_AMD:
|
||||||
|
+ if (ggml_hip_mgmt_init() == 0) {
|
||||||
|
+ int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
|
||||||
|
+ if (status == 0) {
|
||||||
|
+ GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
|
||||||
|
+ ggml_hip_mgmt_release();
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ ggml_hip_mgmt_release();
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+ case VK_VENDOR_ID_NVIDIA:
|
||||||
|
+ if (ggml_nvml_init() == 0) {
|
||||||
|
+ int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
|
||||||
|
+ if (status == 0) {
|
||||||
|
+ GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->uuid.c_str(), *free, *total);
|
||||||
|
+ ggml_nvml_release();
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ ggml_nvml_release();
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
- vkdev.getMemoryProperties2(&memprops);
|
||||||
|
+ // else fallback to memory budget if supported
|
||||||
|
|
||||||
|
- for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
|
||||||
|
- const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
|
||||||
|
+ *total = 0;
|
||||||
|
+ *free = 0;
|
||||||
|
+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
|
||||||
|
+ vk::PhysicalDeviceMemoryProperties2 memprops2;
|
||||||
|
+ memprops2.pNext = &mem_budget_props;
|
||||||
|
+ vkdev.getMemoryProperties2(&memprops2);
|
||||||
|
+ for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
|
||||||
|
+ if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
||||||
|
+ *total += memprops2.memoryProperties.memoryHeaps[i].size;
|
||||||
|
+ } else if (ctx->is_integrated_gpu) {
|
||||||
|
+ // Include shared memory on iGPUs
|
||||||
|
+ *total += memprops2.memoryProperties.memoryHeaps[i].size;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
|
||||||
|
+ if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
||||||
|
+ *free += mem_budget_props.heapBudget[i];
|
||||||
|
+ } else if (ctx->is_integrated_gpu) {
|
||||||
|
+ *free += mem_budget_props.heapBudget[i];
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (*total > 0 && *free > 0) {
|
||||||
|
+ return;
|
||||||
|
+ } else if (*total > 0) {
|
||||||
|
+ *free = *total;
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
+ // else just report the physical memory
|
||||||
|
+ for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
|
||||||
|
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
||||||
|
*total = heap.size;
|
||||||
|
-
|
||||||
|
- if (membudget_supported && i < budgetprops.heapUsage.size()) {
|
||||||
|
- *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
|
||||||
|
- } else {
|
||||||
|
- *free = heap.size;
|
||||||
|
- }
|
||||||
|
+ *free = heap.size;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -12448,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ vk::PhysicalDeviceProperties2 props2;
|
||||||
|
if (!ext_support) {
|
||||||
|
- return "";
|
||||||
|
+ device.getProperties2(&props2);
|
||||||
|
+ if (props2.properties.vendorID != VK_VENDOR_ID_AMD) {
|
||||||
|
+ return "";
|
||||||
|
+ }
|
||||||
|
+ // AMD doesn't claim to support PCI ID, but actually does, so try anyway and check for non-zero
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::PhysicalDeviceProperties2 props = {};
|
||||||
|
@@ -12466,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||||
|
|
||||||
|
char pci_bus_id[16] = {};
|
||||||
|
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
|
||||||
|
+ if (pci_domain == 0 && pci_bus == 0 && pci_device == 0 && pci_function == 0) {
|
||||||
|
+ return "";
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return std::string(pci_bus_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
-//////////////////////////
|
||||||
|
-
|
||||||
|
-struct ggml_backend_vk_device_context {
|
||||||
|
- size_t device;
|
||||||
|
- std::string name;
|
||||||
|
- std::string description;
|
||||||
|
- bool is_integrated_gpu;
|
||||||
|
- std::string pci_bus_id;
|
||||||
|
-};
|
||||||
|
+static bool ggml_backend_vk_parse_pci_bus_id(const std::string & id, int *domain, int *bus, int *device) {
|
||||||
|
+ if (id.empty()) return false;
|
||||||
|
+ unsigned int d = 0, b = 0, dev = 0, func = 0;
|
||||||
|
+ // Expected format: dddd:bb:dd.f (all hex)
|
||||||
|
+ int n = sscanf(id.c_str(), "%4x:%2x:%2x.%1x", &d, &b, &dev, &func);
|
||||||
|
+ if (n < 4) return false;
|
||||||
|
+ if (domain) *domain = (int) d;
|
||||||
|
+ if (bus) *bus = (int) b;
|
||||||
|
+ if (device) *device = (int) dev;
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
|
||||||
|
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
||||||
|
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
||||||
|
@@ -12490,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
|
||||||
|
return ctx->description.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
+static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
|
||||||
|
+ ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
||||||
|
+ return ctx->id.c_str();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
||||||
|
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
|
||||||
|
- ggml_backend_vk_get_device_memory(ctx->device, free, total);
|
||||||
|
+ ggml_backend_vk_get_device_memory(ctx, free, total);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||||
|
@@ -12516,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||||
|
|
||||||
|
props->name = ggml_backend_vk_device_get_name(dev);
|
||||||
|
props->description = ggml_backend_vk_device_get_description(dev);
|
||||||
|
+ props->id = ggml_backend_vk_device_get_id(dev);
|
||||||
|
props->type = ggml_backend_vk_device_get_type(dev);
|
||||||
|
- props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
|
||||||
|
+ props->device_id = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
|
||||||
|
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||||
|
props->caps = {
|
||||||
|
/* .async = */ false,
|
||||||
|
@@ -12525,6 +12637,14 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||||
|
/* .buffer_from_host_ptr = */ false,
|
||||||
|
/* .events = */ false,
|
||||||
|
};
|
||||||
|
+
|
||||||
|
+ props->compute_major = ctx->major;
|
||||||
|
+ props->compute_minor = ctx->minor;
|
||||||
|
+ props->driver_major = ctx->driver_major;
|
||||||
|
+ props->driver_minor = ctx->driver_minor;
|
||||||
|
+ props->integrated = ctx->is_integrated_gpu;
|
||||||
|
+ props->library = GGML_VK_NAME;
|
||||||
|
+ props->numeric_id = ctx->numeric_id.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||||
|
@@ -12953,6 +13073,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||||
|
static std::mutex mutex;
|
||||||
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
if (!initialized) {
|
||||||
|
+ std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
|
||||||
|
+
|
||||||
|
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
|
||||||
|
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
|
||||||
|
char desc[256];
|
||||||
|
@@ -12961,12 +13083,42 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||||
|
ctx->name = GGML_VK_NAME + std::to_string(i);
|
||||||
|
ctx->description = desc;
|
||||||
|
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||||
|
- ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
|
||||||
|
+ ctx->pci_id = ggml_backend_vk_get_device_pci_id(i);
|
||||||
|
+ ctx->id = ggml_backend_vk_get_device_id(i);
|
||||||
|
devices.push_back(new ggml_backend_device {
|
||||||
|
/* .iface = */ ggml_backend_vk_device_i,
|
||||||
|
/* .reg = */ reg,
|
||||||
|
/* .context = */ ctx,
|
||||||
|
});
|
||||||
|
+
|
||||||
|
+ // Gather additional information about the device
|
||||||
|
+ int dev_idx = vk_instance.device_indices[i];
|
||||||
|
+ vk::PhysicalDeviceProperties props1;
|
||||||
|
+ vk_devices[dev_idx].getProperties(&props1);
|
||||||
|
+ vk::PhysicalDeviceProperties2 props2;
|
||||||
|
+ vk::PhysicalDeviceIDProperties device_id_props;
|
||||||
|
+ vk::PhysicalDevicePCIBusInfoPropertiesEXT pci_bus_props;
|
||||||
|
+ vk::PhysicalDeviceDriverProperties driver_props;
|
||||||
|
+ props2.pNext = &device_id_props;
|
||||||
|
+ device_id_props.pNext = &pci_bus_props;
|
||||||
|
+ pci_bus_props.pNext = &driver_props;
|
||||||
|
+ vk_devices[dev_idx].getProperties2(&props2);
|
||||||
|
+ std::ostringstream oss;
|
||||||
|
+ oss << std::hex << std::setfill('0');
|
||||||
|
+ int byteIdx = 0;
|
||||||
|
+ for (int i = 0; i < 16; ++i, ++byteIdx) {
|
||||||
|
+ oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
|
||||||
|
+ if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
|
||||||
|
+ oss << '-';
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ ctx->uuid = oss.str();
|
||||||
|
+ ctx->major = 0;
|
||||||
|
+ ctx->minor = 0;
|
||||||
|
+ // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
||||||
|
+ ctx->driver_major = 0;
|
||||||
|
+ ctx->driver_minor = 0;
|
||||||
|
+ ctx->numeric_id = std::to_string(i);
|
||||||
|
}
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
|
diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
|
||||||
new file mode 100644
|
new file mode 100644
|
||||||
index 000000000..8ef19b8cf
|
index 000000000..5a7f5d465
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ b/ggml/src/mem_hip.cpp
|
+++ b/ggml/src/mem_hip.cpp
|
||||||
@@ -0,0 +1,449 @@
|
@@ -0,0 +1,452 @@
|
||||||
+#include "ggml.h"
|
+#include "ggml.h"
|
||||||
+
|
+
|
||||||
+#ifdef _WIN32
|
+#ifdef _WIN32
|
||||||
|
|
@ -586,7 +890,7 @@ index 000000000..8ef19b8cf
|
||||||
+ if (gpus != NULL) gpus->pVtbl->Release(gpus); \
|
+ if (gpus != NULL) gpus->pVtbl->Release(gpus); \
|
||||||
+ if (gpu != NULL) gpu->pVtbl->Release(gpu)
|
+ if (gpu != NULL) gpu->pVtbl->Release(gpu)
|
||||||
+
|
+
|
||||||
+int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
|
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
||||||
+ std::lock_guard<std::mutex> lock(ggml_adlx_lock);
|
+ std::lock_guard<std::mutex> lock(ggml_adlx_lock);
|
||||||
+ if (adlx.handle == NULL) {
|
+ if (adlx.handle == NULL) {
|
||||||
+ GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
|
+ GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
|
||||||
|
|
@ -598,9 +902,13 @@ index 000000000..8ef19b8cf
|
||||||
+ IADLXGPU* gpu = NULL;
|
+ IADLXGPU* gpu = NULL;
|
||||||
+ IADLXGPUMetrics *gpuMetrics = NULL;
|
+ IADLXGPUMetrics *gpuMetrics = NULL;
|
||||||
+ ADLX_RESULT status;
|
+ ADLX_RESULT status;
|
||||||
+ // The "UniqueID" exposed in ADLX is the PCI Bus and Device IDs
|
|
||||||
+ adlx_int target = (pci_bus_id << 8) | (pci_device_id & 0xff);
|
|
||||||
+
|
+
|
||||||
|
+ uint32_t pci_domain, pci_bus, pci_device, pci_function;
|
||||||
|
+ if (sscanf(id, "%04x:%02x:%02x.%x", &pci_domain, &pci_bus, &pci_device, &pci_function) != 4) {
|
||||||
|
+ // TODO - parse other formats?
|
||||||
|
+ GGML_LOG_DEBUG("%s device ID was not a PCI ID %s\n", __func__, id);
|
||||||
|
+ return ADLX_NOT_FOUND;
|
||||||
|
+ }
|
||||||
+ status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
|
+ status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
|
||||||
+ if (ADLX_FAILED(status)) {
|
+ if (ADLX_FAILED(status)) {
|
||||||
+ GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
|
+ GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
|
||||||
|
|
@ -623,16 +931,15 @@ index 000000000..8ef19b8cf
|
||||||
+ GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
|
+ GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
|
||||||
+ continue;
|
+ continue;
|
||||||
+ }
|
+ }
|
||||||
+ adlx_int id;
|
+ adlx_int uniqueID;
|
||||||
+ status = gpu->pVtbl->UniqueId(gpu, &id);
|
+ status = gpu->pVtbl->UniqueId(gpu, &uniqueID);
|
||||||
+ if (ADLX_FAILED(status)) {
|
+ if (ADLX_FAILED(status)) {
|
||||||
+ GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
|
+ GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
|
||||||
+ gpu->pVtbl->Release(gpu);
|
+ gpu->pVtbl->Release(gpu);
|
||||||
+ gpu = NULL;
|
+ gpu = NULL;
|
||||||
+ continue;
|
+ continue;
|
||||||
+ }
|
+ }
|
||||||
+ if (id != target) {
|
+ if ((((uniqueID >> 8) & 0xff) != pci_bus) || ((uniqueID & 0xff) != pci_device)) {
|
||||||
+ GGML_LOG_DEBUG("%s %d] GPU UniqueId: %x does not match target %02x %02x\n", __func__, crt, id, pci_bus_id, pci_device_id);
|
|
||||||
+ gpu->pVtbl->Release(gpu);
|
+ gpu->pVtbl->Release(gpu);
|
||||||
+ gpu = NULL;
|
+ gpu = NULL;
|
||||||
+ continue;
|
+ continue;
|
||||||
|
|
@ -695,7 +1002,7 @@ index 000000000..8ef19b8cf
|
||||||
+ return -1;
|
+ return -1;
|
||||||
+}
|
+}
|
||||||
+void ggml_hip_mgmt_release() {}
|
+void ggml_hip_mgmt_release() {}
|
||||||
+int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
|
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
||||||
+ return -1;
|
+ return -1;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ Subject: [PATCH] NVML fallback for unified memory GPUs
|
||||||
1 file changed, 68 insertions(+), 3 deletions(-)
|
1 file changed, 68 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
diff --git a/ggml/src/mem_nvml.cpp b/ggml/src/mem_nvml.cpp
|
diff --git a/ggml/src/mem_nvml.cpp b/ggml/src/mem_nvml.cpp
|
||||||
index c9073cef..f473a2a2 100644
|
index c9073cef0..f473a2a2c 100644
|
||||||
--- a/ggml/src/mem_nvml.cpp
|
--- a/ggml/src/mem_nvml.cpp
|
||||||
+++ b/ggml/src/mem_nvml.cpp
|
+++ b/ggml/src/mem_nvml.cpp
|
||||||
@@ -13,6 +13,7 @@
|
@@ -13,6 +13,7 @@
|
||||||
|
|
@ -1,95 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Xiaodong Ye <xiaodong.ye@mthreads.com>
|
|
||||||
Date: Mon, 18 Aug 2025 12:48:07 +0800
|
|
||||||
Subject: [PATCH] vulkan: get GPU ID (ollama v0.11.5)
|
|
||||||
|
|
||||||
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
|
|
||||||
---
|
|
||||||
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 37 ++++++++++++++++++++++++++++
|
|
||||||
1 file changed, 37 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
||||||
index 061cd078..adea7783 100644
|
|
||||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
||||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
||||||
@@ -11588,6 +11588,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
|
|
||||||
snprintf(description, description_size, "%s", props.deviceName.data());
|
|
||||||
}
|
|
||||||
|
|
||||||
+static std::string ggml_vk_get_device_id(int device) {
|
|
||||||
+ ggml_vk_instance_init();
|
|
||||||
+
|
|
||||||
+ std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
|
|
||||||
+
|
|
||||||
+ vk::PhysicalDeviceProperties2 props;
|
|
||||||
+ vk::PhysicalDeviceIDProperties deviceIDProps;
|
|
||||||
+ props.pNext = &deviceIDProps;
|
|
||||||
+ devices[device].getProperties2(&props);
|
|
||||||
+
|
|
||||||
+ const auto& uuid = deviceIDProps.deviceUUID;
|
|
||||||
+ char id[64];
|
|
||||||
+ snprintf(id, sizeof(id),
|
|
||||||
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
|
||||||
+ uuid[0], uuid[1], uuid[2], uuid[3],
|
|
||||||
+ uuid[4], uuid[5],
|
|
||||||
+ uuid[6], uuid[7],
|
|
||||||
+ uuid[8], uuid[9],
|
|
||||||
+ uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
|
|
||||||
+ );
|
|
||||||
+ return std::string(id);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// backend interface
|
|
||||||
|
|
||||||
#define UNUSED GGML_UNUSED
|
|
||||||
@@ -12394,6 +12417,12 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
|
|
||||||
ggml_vk_get_device_description(dev_idx, description, description_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
+std::string ggml_backend_vk_get_device_id(int device) {
|
|
||||||
+ GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
|
||||||
+ int dev_idx = vk_instance.device_indices[device];
|
|
||||||
+ return ggml_vk_get_device_id(dev_idx);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
|
||||||
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
|
||||||
GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
|
|
||||||
@@ -12481,6 +12510,7 @@ struct ggml_backend_vk_device_context {
|
|
||||||
std::string description;
|
|
||||||
bool is_integrated_gpu;
|
|
||||||
std::string pci_bus_id;
|
|
||||||
+ std::string id;
|
|
||||||
};
|
|
||||||
|
|
||||||
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
|
||||||
@@ -12493,6 +12523,11 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
|
|
||||||
return ctx->description.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
+static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
|
|
||||||
+ ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
||||||
+ return ctx->id.c_str();
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
|
||||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
|
|
||||||
ggml_backend_vk_get_device_memory(ctx->device, free, total);
|
|
||||||
@@ -12519,6 +12554,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
|
||||||
|
|
||||||
props->name = ggml_backend_vk_device_get_name(dev);
|
|
||||||
props->description = ggml_backend_vk_device_get_description(dev);
|
|
||||||
+ props->id = ggml_backend_vk_device_get_id(dev);
|
|
||||||
props->type = ggml_backend_vk_device_get_type(dev);
|
|
||||||
props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
|
|
||||||
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
||||||
@@ -12965,6 +13001,7 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
|
||||||
ctx->description = desc;
|
|
||||||
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
|
|
||||||
ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
|
|
||||||
+ ctx->id = ggml_backend_vk_get_device_id(i);
|
|
||||||
devices.push_back(new ggml_backend_device {
|
|
||||||
/* .iface = */ ggml_backend_vk_device_i,
|
|
||||||
/* .reg = */ reg,
|
|
||||||
--
|
|
||||||
2.51.0
|
|
||||||
|
|
@ -28,7 +28,7 @@ Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
|
||||||
1 file changed, 9 insertions(+)
|
1 file changed, 9 insertions(+)
|
||||||
|
|
||||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
index 6a278b5e9..87941f872 100644
|
index b075a18be..d62f412d6 100644
|
||||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
@@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
@@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
|
|
@ -1,254 +0,0 @@
|
||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Daniel Hiltgen <daniel@ollama.com>
|
|
||||||
Date: Fri Sep 5 08:25:03 2025 -0700
|
|
||||||
Subject: [PATCH] Vulkan PCI and Memory
|
|
||||||
|
|
||||||
---
|
|
||||||
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 176 ++++++++++++++++++++++-----
|
|
||||||
1 file changed, 145 insertions(+), 31 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
||||||
index adea7783..fb7204ce 100644
|
|
||||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
||||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
||||||
@@ -12423,31 +12423,99 @@ std::string ggml_backend_vk_get_device_id(int device) {
|
|
||||||
return ggml_vk_get_device_id(dev_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
-void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
|
||||||
- GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
|
||||||
- GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
|
|
||||||
+//////////////////////////
|
|
||||||
+
|
|
||||||
+struct ggml_backend_vk_device_context {
|
|
||||||
+ size_t device;
|
|
||||||
+ std::string name;
|
|
||||||
+ std::string description;
|
|
||||||
+ bool is_integrated_gpu;
|
|
||||||
+ // Combined string id in the form "dddd:bb:dd.f" (domain:bus:device.function)
|
|
||||||
+ std::string pci_id;
|
|
||||||
+ std::string id;
|
|
||||||
+ std::string uuid;
|
|
||||||
+ int major;
|
|
||||||
+ int minor;
|
|
||||||
+ int driver_major;
|
|
||||||
+ int driver_minor;
|
|
||||||
+ int pci_bus_id;
|
|
||||||
+ int pci_device_id;
|
|
||||||
+ int pci_domain_id;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
|
|
||||||
+ GGML_ASSERT(ctx->device < (int) vk_instance.device_indices.size());
|
|
||||||
+ GGML_ASSERT(ctx->device < (int) vk_instance.device_supports_membudget.size());
|
|
||||||
+
|
|
||||||
+ vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[ctx->device]];
|
|
||||||
|
|
||||||
- vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
|
||||||
- vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
|
|
||||||
- vk::PhysicalDeviceMemoryProperties2 memprops = {};
|
|
||||||
- bool membudget_supported = vk_instance.device_supports_membudget[device];
|
|
||||||
+ vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
|
|
||||||
+ vk::PhysicalDeviceProperties2 props2;
|
|
||||||
+ vkdev.getProperties2(&props2);
|
|
||||||
|
|
||||||
- if (membudget_supported) {
|
|
||||||
- memprops.pNext = &budgetprops;
|
|
||||||
+ if (!ctx->is_integrated_gpu)
|
|
||||||
+ {
|
|
||||||
+ // Use vendor specific management libraries for best VRAM reporting if available
|
|
||||||
+ switch (props2.properties.vendorID) {
|
|
||||||
+ case VK_VENDOR_ID_AMD:
|
|
||||||
+ if (ggml_hip_mgmt_init() == 0) {
|
|
||||||
+ int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
|
|
||||||
+ if (status == 0) {
|
|
||||||
+ GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
|
||||||
+ ggml_hip_mgmt_release();
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ ggml_hip_mgmt_release();
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+ case VK_VENDOR_ID_NVIDIA:
|
|
||||||
+ if (ggml_nvml_init() == 0) {
|
|
||||||
+ int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
|
|
||||||
+ if (status == 0) {
|
|
||||||
+ GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
|
||||||
+ ggml_nvml_release();
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ ggml_nvml_release();
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
- vkdev.getMemoryProperties2(&memprops);
|
|
||||||
+ // else fallback to memory budget if supported
|
|
||||||
|
|
||||||
- for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
|
|
||||||
- const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
|
|
||||||
+ *total = 0;
|
|
||||||
+ *free = 0;
|
|
||||||
+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
|
|
||||||
+ vk::PhysicalDeviceMemoryProperties2 memprops2;
|
|
||||||
+ memprops2.pNext = &mem_budget_props;
|
|
||||||
+ vkdev.getMemoryProperties2(&memprops2);
|
|
||||||
+ for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
|
|
||||||
+ if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
|
||||||
+ *total += memprops2.memoryProperties.memoryHeaps[i].size;
|
|
||||||
+ } else if (ctx->is_integrated_gpu) {
|
|
||||||
+ // Include shared memory on iGPUs
|
|
||||||
+ *total += memprops2.memoryProperties.memoryHeaps[i].size;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ for (int i = 0; i < memprops2.memoryProperties.memoryHeapCount; i++) {
|
|
||||||
+ if (memprops2.memoryProperties.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
|
||||||
+ *free += mem_budget_props.heapBudget[i];
|
|
||||||
+ } else if (ctx->is_integrated_gpu) {
|
|
||||||
+ *free += mem_budget_props.heapBudget[i];
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ if (*total > 0 && *free > 0) {
|
|
||||||
+ return;
|
|
||||||
+ } else if (*total > 0) {
|
|
||||||
+ *free = *total;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
+ // else just report the physical memory
|
|
||||||
+ for (const vk::MemoryHeap& heap : memprops2.memoryProperties.memoryHeaps) {
|
|
||||||
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
|
||||||
*total = heap.size;
|
|
||||||
-
|
|
||||||
- if (membudget_supported && i < budgetprops.heapUsage.size()) {
|
|
||||||
- *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
|
|
||||||
- } else {
|
|
||||||
- *free = heap.size;
|
|
||||||
- }
|
|
||||||
+ *free = heap.size;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -12502,16 +12570,17 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
|
||||||
return std::string(pci_bus_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
-//////////////////////////
|
|
||||||
-
|
|
||||||
-struct ggml_backend_vk_device_context {
|
|
||||||
- size_t device;
|
|
||||||
- std::string name;
|
|
||||||
- std::string description;
|
|
||||||
- bool is_integrated_gpu;
|
|
||||||
- std::string pci_bus_id;
|
|
||||||
- std::string id;
|
|
||||||
-};
|
|
||||||
+static bool ggml_backend_vk_parse_pci_bus_id(const std::string & id, int *domain, int *bus, int *device) {
|
|
||||||
+ if (id.empty()) return false;
|
|
||||||
+ unsigned int d = 0, b = 0, dev = 0, func = 0;
|
|
||||||
+ // Expected format: dddd:bb:dd.f (all hex)
|
|
||||||
+ int n = sscanf(id.c_str(), "%4x:%2x:%2x.%1x", &d, &b, &dev, &func);
|
|
||||||
+ if (n < 4) return false;
|
|
||||||
+ if (domain) *domain = (int) d;
|
|
||||||
+ if (bus) *bus = (int) b;
|
|
||||||
+ if (device) *device = (int) dev;
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
|
|
||||||
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
|
||||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
||||||
@@ -12530,7 +12599,7 @@ static const char * ggml_backend_vk_device_get_id(ggml_backend_dev_t dev) {
|
|
||||||
|
|
||||||
static void ggml_backend_vk_device_get_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
|
||||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)device->context;
|
|
||||||
- ggml_backend_vk_get_device_memory(ctx->device, free, total);
|
|
||||||
+ ggml_backend_vk_get_device_memory(ctx, free, total);
|
|
||||||
}
|
|
||||||
|
|
||||||
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
||||||
@@ -12556,7 +12625,7 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
|
||||||
props->description = ggml_backend_vk_device_get_description(dev);
|
|
||||||
props->id = ggml_backend_vk_device_get_id(dev);
|
|
||||||
props->type = ggml_backend_vk_device_get_type(dev);
|
|
||||||
- props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
|
|
||||||
+ props->device_id = ctx->pci_id.empty() ? nullptr : ctx->pci_id.c_str();
|
|
||||||
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
||||||
props->caps = {
|
|
||||||
/* .async = */ false,
|
|
||||||
@@ -12564,6 +12633,17 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
|
||||||
/* .buffer_from_host_ptr = */ false,
|
|
||||||
/* .events = */ false,
|
|
||||||
};
|
|
||||||
+
|
|
||||||
+ props->compute_major = ctx->major;
|
|
||||||
+ props->compute_minor = ctx->minor;
|
|
||||||
+ props->driver_major = ctx->driver_major;
|
|
||||||
+ props->driver_minor = ctx->driver_minor;
|
|
||||||
+ props->integrated = ctx->is_integrated_gpu;
|
|
||||||
+ props->pci_bus_id = ctx->pci_bus_id;
|
|
||||||
+ props->pci_device_id = ctx->pci_device_id;
|
|
||||||
+ props->pci_domain_id = ctx->pci_domain_id;
|
|
||||||
+ props->library = GGML_VK_NAME;
|
|
||||||
+ props->numeric_id = ctx->id.empty() ? nullptr : ctx->id.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
|
||||||
@@ -12992,6 +13071,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
|
||||||
static std::mutex mutex;
|
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
|
||||||
if (!initialized) {
|
|
||||||
+ std::vector<vk::PhysicalDevice> vk_devices = vk_instance.instance.enumeratePhysicalDevices();
|
|
||||||
+
|
|
||||||
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
|
|
||||||
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
|
|
||||||
char desc[256];
|
|
||||||
@@ -13000,13 +13081,46 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
|
||||||
ctx->name = GGML_VK_NAME + std::to_string(i);
|
|
||||||
ctx->description = desc;
|
|
||||||
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
|
|
||||||
- ctx->pci_bus_id = ggml_backend_vk_get_device_pci_id(i);
|
|
||||||
+ ctx->pci_id = ggml_backend_vk_get_device_pci_id(i);
|
|
||||||
ctx->id = ggml_backend_vk_get_device_id(i);
|
|
||||||
devices.push_back(new ggml_backend_device {
|
|
||||||
/* .iface = */ ggml_backend_vk_device_i,
|
|
||||||
/* .reg = */ reg,
|
|
||||||
/* .context = */ ctx,
|
|
||||||
});
|
|
||||||
+
|
|
||||||
+ // Gather additional information about the device
|
|
||||||
+ int dev_idx = vk_instance.device_indices[i];
|
|
||||||
+ vk::PhysicalDeviceProperties props1;
|
|
||||||
+ vk_devices[dev_idx].getProperties(&props1);
|
|
||||||
+ vk::PhysicalDeviceProperties2 props2;
|
|
||||||
+ vk::PhysicalDeviceIDProperties device_id_props;
|
|
||||||
+ vk::PhysicalDevicePCIBusInfoPropertiesEXT pci_bus_props;
|
|
||||||
+ vk::PhysicalDeviceDriverProperties driver_props;
|
|
||||||
+ props2.pNext = &device_id_props;
|
|
||||||
+ device_id_props.pNext = &pci_bus_props;
|
|
||||||
+ pci_bus_props.pNext = &driver_props;
|
|
||||||
+ vk_devices[dev_idx].getProperties2(&props2);
|
|
||||||
+ std::ostringstream oss;
|
|
||||||
+ oss << std::hex << std::setfill('0');
|
|
||||||
+ oss << "GPU-";
|
|
||||||
+ int byteIdx = 0;
|
|
||||||
+ for (int i = 0; i < 16; ++i, ++byteIdx) {
|
|
||||||
+ oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
|
|
||||||
+ if (byteIdx == 3 || byteIdx == 5 || byteIdx == 7 || byteIdx == 9) {
|
|
||||||
+ oss << '-';
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ ctx->uuid = oss.str();
|
|
||||||
+ ctx->pci_bus_id = pci_bus_props.pciBus;
|
|
||||||
+ ctx->pci_device_id = pci_bus_props.pciDevice;
|
|
||||||
+ ctx->pci_domain_id = pci_bus_props.pciDomain;
|
|
||||||
+ ctx->id = std::to_string(i);
|
|
||||||
+ ctx->major = 0;
|
|
||||||
+ ctx->minor = 0;
|
|
||||||
+ // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
|
||||||
+ ctx->driver_major = 0;
|
|
||||||
+ ctx->driver_minor = 0;
|
|
||||||
}
|
|
||||||
initialized = true;
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.51.0
|
|
||||||
|
|
@ -725,7 +725,9 @@ func (b *Backend) BackendDevices() []ml.DeviceInfo {
|
||||||
if props.library != nil {
|
if props.library != nil {
|
||||||
info.Library = C.GoString(props.library)
|
info.Library = C.GoString(props.library)
|
||||||
}
|
}
|
||||||
info.PCIID = fmt.Sprintf("%02x:%02x.%x", props.pci_bus_id, props.pci_device_id, props.pci_domain_id)
|
if props.device_id != nil {
|
||||||
|
info.PCIID = C.GoString(props.device_id)
|
||||||
|
}
|
||||||
info.LibraryPath = ggml.LibPaths()
|
info.LibraryPath = ggml.LibPaths()
|
||||||
if props.numeric_id != nil {
|
if props.numeric_id != nil {
|
||||||
info.FilteredID = C.GoString(props.numeric_id)
|
info.FilteredID = C.GoString(props.numeric_id)
|
||||||
|
|
|
||||||
3
ml/backend/ggml/ggml/include/ggml-backend.h
vendored
3
ml/backend/ggml/ggml/include/ggml-backend.h
vendored
|
|
@ -174,9 +174,6 @@ extern "C" {
|
||||||
int compute_major;
|
int compute_major;
|
||||||
int compute_minor;
|
int compute_minor;
|
||||||
int integrated;
|
int integrated;
|
||||||
int pci_bus_id;
|
|
||||||
int pci_device_id;
|
|
||||||
int pci_domain_id;
|
|
||||||
const char *library;
|
const char *library;
|
||||||
// number with which the devices are accessed (Vulkan)
|
// number with which the devices are accessed (Vulkan)
|
||||||
const char *numeric_id;
|
const char *numeric_id;
|
||||||
|
|
|
||||||
15
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
vendored
15
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
vendored
|
|
@ -3513,9 +3513,6 @@ struct ggml_backend_cuda_device_context {
|
||||||
int driver_major;
|
int driver_major;
|
||||||
int driver_minor;
|
int driver_minor;
|
||||||
int integrated;
|
int integrated;
|
||||||
int pciBusID;
|
|
||||||
int pciDeviceID;
|
|
||||||
int pciDomainID;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
||||||
|
|
@ -3539,9 +3536,9 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
|
||||||
|
|
||||||
#if defined(GGML_USE_HIP)
|
#if defined(GGML_USE_HIP)
|
||||||
if (ggml_hip_mgmt_init() == 0) {
|
if (ggml_hip_mgmt_init() == 0) {
|
||||||
int status = ggml_hip_get_device_memory(ctx->pciBusID, ctx->pciDeviceID, free, total);
|
int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
|
||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
|
||||||
ggml_hip_mgmt_release();
|
ggml_hip_mgmt_release();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -3551,7 +3548,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
|
||||||
if (ggml_nvml_init() == 0) {
|
if (ggml_nvml_init() == 0) {
|
||||||
int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
|
int status = ggml_nvml_get_device_memory(ctx->id.c_str(), free, total);
|
||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->id.c_str(), *free, *total);
|
||||||
ggml_nvml_release();
|
ggml_nvml_release();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -3591,9 +3588,6 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
||||||
props->driver_major = ctx->driver_major;
|
props->driver_major = ctx->driver_major;
|
||||||
props->driver_minor = ctx->driver_minor;
|
props->driver_minor = ctx->driver_minor;
|
||||||
props->integrated = ctx->integrated;
|
props->integrated = ctx->integrated;
|
||||||
props->pci_bus_id = ctx->pciBusID;
|
|
||||||
props->pci_device_id = ctx->pciDeviceID;
|
|
||||||
props->pci_domain_id = ctx->pciDomainID;
|
|
||||||
props->library = GGML_CUDA_NAME;
|
props->library = GGML_CUDA_NAME;
|
||||||
|
|
||||||
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
|
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
|
||||||
|
|
@ -4182,9 +4176,6 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||||
dev_ctx->driver_major = driverVersion / 1000;
|
dev_ctx->driver_major = driverVersion / 1000;
|
||||||
dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
|
dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
|
||||||
dev_ctx->integrated = prop.integrated;
|
dev_ctx->integrated = prop.integrated;
|
||||||
dev_ctx->pciBusID = prop.pciBusID;
|
|
||||||
dev_ctx->pciDeviceID = prop.pciDeviceID;
|
|
||||||
dev_ctx->pciDomainID = prop.pciDomainID;
|
|
||||||
ggml_backend_dev_t dev = new ggml_backend_device {
|
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||||
/* .iface = */ ggml_backend_cuda_device_interface,
|
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||||
/* .reg = */ ®,
|
/* .reg = */ ®,
|
||||||
|
|
|
||||||
2
ml/backend/ggml/ggml/src/ggml-impl.h
vendored
2
ml/backend/ggml/ggml/src/ggml-impl.h
vendored
|
|
@ -643,7 +643,7 @@ GGML_API int ggml_nvml_init();
|
||||||
GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
|
GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
|
||||||
GGML_API void ggml_nvml_release();
|
GGML_API void ggml_nvml_release();
|
||||||
GGML_API int ggml_hip_mgmt_init();
|
GGML_API int ggml_hip_mgmt_init();
|
||||||
GGML_API int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total);
|
GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
|
||||||
GGML_API void ggml_hip_mgmt_release();
|
GGML_API void ggml_hip_mgmt_release();
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -231,6 +231,7 @@ class vk_memory_logger;
|
||||||
#endif
|
#endif
|
||||||
class vk_perf_logger;
|
class vk_perf_logger;
|
||||||
static void ggml_vk_destroy_buffer(vk_buffer& buf);
|
static void ggml_vk_destroy_buffer(vk_buffer& buf);
|
||||||
|
static std::string ggml_vk_get_device_id(int device);
|
||||||
|
|
||||||
static constexpr uint32_t mul_mat_vec_max_cols = 8;
|
static constexpr uint32_t mul_mat_vec_max_cols = 8;
|
||||||
static constexpr uint32_t p021_max_gqa_ratio = 8;
|
static constexpr uint32_t p021_max_gqa_ratio = 8;
|
||||||
|
|
@ -11598,7 +11599,7 @@ static std::string ggml_vk_get_device_id(int device) {
|
||||||
const auto& uuid = deviceIDProps.deviceUUID;
|
const auto& uuid = deviceIDProps.deviceUUID;
|
||||||
char id[64];
|
char id[64];
|
||||||
snprintf(id, sizeof(id),
|
snprintf(id, sizeof(id),
|
||||||
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
"%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||||
uuid[0], uuid[1], uuid[2], uuid[3],
|
uuid[0], uuid[1], uuid[2], uuid[3],
|
||||||
uuid[4], uuid[5],
|
uuid[4], uuid[5],
|
||||||
uuid[6], uuid[7],
|
uuid[6], uuid[7],
|
||||||
|
|
@ -12431,13 +12432,11 @@ struct ggml_backend_vk_device_context {
|
||||||
std::string pci_id;
|
std::string pci_id;
|
||||||
std::string id;
|
std::string id;
|
||||||
std::string uuid;
|
std::string uuid;
|
||||||
|
std::string numeric_id;
|
||||||
int major;
|
int major;
|
||||||
int minor;
|
int minor;
|
||||||
int driver_major;
|
int driver_major;
|
||||||
int driver_minor;
|
int driver_minor;
|
||||||
int pci_bus_id;
|
|
||||||
int pci_device_id;
|
|
||||||
int pci_domain_id;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
|
void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size_t * free, size_t * total) {
|
||||||
|
|
@ -12456,9 +12455,9 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
|
||||||
switch (props2.properties.vendorID) {
|
switch (props2.properties.vendorID) {
|
||||||
case VK_VENDOR_ID_AMD:
|
case VK_VENDOR_ID_AMD:
|
||||||
if (ggml_hip_mgmt_init() == 0) {
|
if (ggml_hip_mgmt_init() == 0) {
|
||||||
int status = ggml_hip_get_device_memory(ctx->pci_bus_id, ctx->pci_device_id, free, total);
|
int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
|
||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
GGML_LOG_DEBUG("%s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
|
||||||
ggml_hip_mgmt_release();
|
ggml_hip_mgmt_release();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -12469,7 +12468,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
|
||||||
if (ggml_nvml_init() == 0) {
|
if (ggml_nvml_init() == 0) {
|
||||||
int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
|
int status = ggml_nvml_get_device_memory(ctx->uuid.c_str(), free, total);
|
||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
GGML_LOG_DEBUG("%s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, *free, *total);
|
GGML_LOG_DEBUG("%s device %s utilizing NVML memory reporting free: %zu total: %zu\n", __func__, ctx->uuid.c_str(), *free, *total);
|
||||||
ggml_nvml_release();
|
ggml_nvml_release();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -12545,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::PhysicalDeviceProperties2 props2;
|
||||||
if (!ext_support) {
|
if (!ext_support) {
|
||||||
return "";
|
device.getProperties2(&props2);
|
||||||
|
if (props2.properties.vendorID != VK_VENDOR_ID_AMD) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
// AMD doesn't claim to support PCI ID, but actually does, so try anyway and check for non-zero
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::PhysicalDeviceProperties2 props = {};
|
vk::PhysicalDeviceProperties2 props = {};
|
||||||
|
|
@ -12563,6 +12567,9 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||||
|
|
||||||
char pci_bus_id[16] = {};
|
char pci_bus_id[16] = {};
|
||||||
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
|
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
|
||||||
|
if (pci_domain == 0 && pci_bus == 0 && pci_device == 0 && pci_function == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
return std::string(pci_bus_id);
|
return std::string(pci_bus_id);
|
||||||
}
|
}
|
||||||
|
|
@ -12636,11 +12643,8 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||||
props->driver_major = ctx->driver_major;
|
props->driver_major = ctx->driver_major;
|
||||||
props->driver_minor = ctx->driver_minor;
|
props->driver_minor = ctx->driver_minor;
|
||||||
props->integrated = ctx->is_integrated_gpu;
|
props->integrated = ctx->is_integrated_gpu;
|
||||||
props->pci_bus_id = ctx->pci_bus_id;
|
|
||||||
props->pci_device_id = ctx->pci_device_id;
|
|
||||||
props->pci_domain_id = ctx->pci_domain_id;
|
|
||||||
props->library = GGML_VK_NAME;
|
props->library = GGML_VK_NAME;
|
||||||
props->numeric_id = ctx->id.empty() ? nullptr : ctx->id.c_str();
|
props->numeric_id = ctx->numeric_id.c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||||
|
|
@ -13101,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||||
vk_devices[dev_idx].getProperties2(&props2);
|
vk_devices[dev_idx].getProperties2(&props2);
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << std::hex << std::setfill('0');
|
oss << std::hex << std::setfill('0');
|
||||||
oss << "GPU-";
|
|
||||||
int byteIdx = 0;
|
int byteIdx = 0;
|
||||||
for (int i = 0; i < 16; ++i, ++byteIdx) {
|
for (int i = 0; i < 16; ++i, ++byteIdx) {
|
||||||
oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
|
oss << std::setw(2) << static_cast<int>(device_id_props.deviceUUID[i]);
|
||||||
|
|
@ -13110,15 +13113,12 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ctx->uuid = oss.str();
|
ctx->uuid = oss.str();
|
||||||
ctx->pci_bus_id = pci_bus_props.pciBus;
|
|
||||||
ctx->pci_device_id = pci_bus_props.pciDevice;
|
|
||||||
ctx->pci_domain_id = pci_bus_props.pciDomain;
|
|
||||||
ctx->id = std::to_string(i);
|
|
||||||
ctx->major = 0;
|
ctx->major = 0;
|
||||||
ctx->minor = 0;
|
ctx->minor = 0;
|
||||||
// TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
// TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
||||||
ctx->driver_major = 0;
|
ctx->driver_major = 0;
|
||||||
ctx->driver_minor = 0;
|
ctx->driver_minor = 0;
|
||||||
|
ctx->numeric_id = std::to_string(i);
|
||||||
}
|
}
|
||||||
initialized = true;
|
initialized = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
19
ml/backend/ggml/ggml/src/mem_hip.cpp
vendored
19
ml/backend/ggml/ggml/src/mem_hip.cpp
vendored
|
|
@ -331,7 +331,7 @@ void ggml_hip_mgmt_release() {
|
||||||
if (gpus != NULL) gpus->pVtbl->Release(gpus); \
|
if (gpus != NULL) gpus->pVtbl->Release(gpus); \
|
||||||
if (gpu != NULL) gpu->pVtbl->Release(gpu)
|
if (gpu != NULL) gpu->pVtbl->Release(gpu)
|
||||||
|
|
||||||
int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
|
int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
||||||
std::lock_guard<std::mutex> lock(ggml_adlx_lock);
|
std::lock_guard<std::mutex> lock(ggml_adlx_lock);
|
||||||
if (adlx.handle == NULL) {
|
if (adlx.handle == NULL) {
|
||||||
GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
|
GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
|
||||||
|
|
@ -343,9 +343,13 @@ int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free,
|
||||||
IADLXGPU* gpu = NULL;
|
IADLXGPU* gpu = NULL;
|
||||||
IADLXGPUMetrics *gpuMetrics = NULL;
|
IADLXGPUMetrics *gpuMetrics = NULL;
|
||||||
ADLX_RESULT status;
|
ADLX_RESULT status;
|
||||||
// The "UniqueID" exposed in ADLX is the PCI Bus and Device IDs
|
|
||||||
adlx_int target = (pci_bus_id << 8) | (pci_device_id & 0xff);
|
|
||||||
|
|
||||||
|
uint32_t pci_domain, pci_bus, pci_device, pci_function;
|
||||||
|
if (sscanf(id, "%04x:%02x:%02x.%x", &pci_domain, &pci_bus, &pci_device, &pci_function) != 4) {
|
||||||
|
// TODO - parse other formats?
|
||||||
|
GGML_LOG_DEBUG("%s device ID was not a PCI ID %s\n", __func__, id);
|
||||||
|
return ADLX_NOT_FOUND;
|
||||||
|
}
|
||||||
status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
|
status = adlx.sys->pVtbl->GetPerformanceMonitoringServices(adlx.sys, &perfMonitoringServices);
|
||||||
if (ADLX_FAILED(status)) {
|
if (ADLX_FAILED(status)) {
|
||||||
GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
|
GGML_LOG_INFO("%s GetPerformanceMonitoringServices failed %d\n", __func__, status);
|
||||||
|
|
@ -368,16 +372,15 @@ int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free,
|
||||||
GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
|
GGML_LOG_INFO("%s %d] At_GPUList failed %d\n", __func__, crt, status);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
adlx_int id;
|
adlx_int uniqueID;
|
||||||
status = gpu->pVtbl->UniqueId(gpu, &id);
|
status = gpu->pVtbl->UniqueId(gpu, &uniqueID);
|
||||||
if (ADLX_FAILED(status)) {
|
if (ADLX_FAILED(status)) {
|
||||||
GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
|
GGML_LOG_INFO("%s %d] UniqueId lookup failed %d\n", __func__, crt, status);
|
||||||
gpu->pVtbl->Release(gpu);
|
gpu->pVtbl->Release(gpu);
|
||||||
gpu = NULL;
|
gpu = NULL;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (id != target) {
|
if ((((uniqueID >> 8) & 0xff) != pci_bus) || ((uniqueID & 0xff) != pci_device)) {
|
||||||
GGML_LOG_DEBUG("%s %d] GPU UniqueId: %x does not match target %02x %02x\n", __func__, crt, id, pci_bus_id, pci_device_id);
|
|
||||||
gpu->pVtbl->Release(gpu);
|
gpu->pVtbl->Release(gpu);
|
||||||
gpu = NULL;
|
gpu = NULL;
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -440,7 +443,7 @@ int ggml_hip_mgmt_init() {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
void ggml_hip_mgmt_release() {}
|
void ggml_hip_mgmt_release() {}
|
||||||
int ggml_hip_get_device_memory(int pci_bus_id, int pci_device_id, size_t *free, size_t *total) {
|
int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -391,6 +391,10 @@ func (a DeviceInfo) Compare(b DeviceInfo) DeviceComparison {
|
||||||
if a.PCIID != b.PCIID {
|
if a.PCIID != b.PCIID {
|
||||||
return UniqueDevice
|
return UniqueDevice
|
||||||
}
|
}
|
||||||
|
// If PCIID is empty, we have to use ID + library for uniqueness
|
||||||
|
if a.PCIID == "" && a.DeviceID != b.DeviceID {
|
||||||
|
return UniqueDevice
|
||||||
|
}
|
||||||
if a.Library == b.Library {
|
if a.Library == b.Library {
|
||||||
return SameBackendDevice
|
return SameBackendDevice
|
||||||
}
|
}
|
||||||
|
|
@ -454,13 +458,13 @@ func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
|
||||||
var envVar string
|
var envVar string
|
||||||
switch d.Library {
|
switch d.Library {
|
||||||
case "ROCm":
|
case "ROCm":
|
||||||
|
// ROCm must be filtered as it can crash the runner on unsupported devices
|
||||||
envVar = "ROCR_VISIBLE_DEVICES"
|
envVar = "ROCR_VISIBLE_DEVICES"
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
envVar = "HIP_VISIBLE_DEVICES"
|
envVar = "HIP_VISIBLE_DEVICES"
|
||||||
}
|
}
|
||||||
case "Vulkan":
|
|
||||||
envVar = "GGML_VK_VISIBLE_DEVICES"
|
|
||||||
default:
|
default:
|
||||||
|
// CUDA and Vulkan are not filtered via env var, but via scheduling decisions
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
v, existing := env[envVar]
|
v, existing := env[envVar]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user