libhsakmt: Implement per-context topology for multi-context KFD support (#2405)

This enhances libhsakmt's capabilities for multi-context KFD support by implementing per-context topology management.

Changes:
* Add hsaKmtGetClockCountersCtx for multi-context support
  - Add context-aware version of hsaKmtGetClockCounters
  - Original API is retained as a wrapper calling the ctx-version with primary context

* Enable independent debug sessions across multiple KFD contexts
  -Create hsa_kfd_debug_context, introduce context-aware debug APIs, shift debug state to per-context

* Add perf sub-context for per-context performance counter management
  - Introduce hsa_kfd_perf_context, move counter properties, add context - aware perf APIs, and update initialization

* Refactor FMM for per-context resource management
  - Refactor multiple global variables related to FMM, including 
    GPU ID arrays , svm, cpuvm_aperture, and mem_handle_aperture to hsa_kfd_fmm_context

* Implement per-context topology for complete context isolation
  - Migrate global topology data (g_system, g_props, map_user_to_sysfs_node_id)
     to per-context hsa_kfd_topology_context structure
  - Update all topology functions to accept HsaKFDContext parameter for
     context-aware operations (validate_nodeid, get_node_props, get_iolink_props, etc.)
  - Refactor topology snapshot management for per-context isolation
  - Add context-aware PMC trace access APIs

Signed-off-by: Junhua Shen <Junhua.Shen@amd.com>
This commit is contained in:
Junhua Shen
2026-01-30 09:42:25 +08:00
committato da GitHub
parent a838b0c07b
commit 0d98c3bdd5
20 ha cambiato i file con 998 aggiunte e 536 eliminazioni
@@ -32,6 +32,8 @@
extern "C" {
#endif
/* Forward declaration for debug trap ioctl arguments */
struct kfd_ioctl_dbg_trap_args;
/**
"Opens" the HSA kernel driver for user-kernel mode communication.
@@ -852,8 +854,10 @@ hsaKmtCheckRuntimeDebugSupport(
/**
Debug ops call primarily used for KFD testing
*/
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(
struct kfd_ioctl_dbg_trap_args *arg,
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDebugTrapIoctl(
struct kfd_ioctl_dbg_trap_args *args,
HSA_QUEUEID *Queues,
HSAuint64 *DebugReturn
);
+170 -57
Vedi File
@@ -26,38 +26,82 @@
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
static bool *is_device_debugged;
static uint32_t runtime_capabilities_mask = 0;
/*
* hsa_kfd_debug_context
*
* Represents the debug state for a KFD context.
* Each HsaKFDContext has its own independent debug context.
*/
struct hsa_kfd_debug_context {
/* Array tracking which nodes are being debugged */
bool *is_device_debugged;
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes)
/* Runtime debug capabilities mask */
uint32_t runtime_capabilities_mask;
};
struct hsa_kfd_debug_context *hsakmt_kfdcontext_get_debug_context(HsaKFDContext *ctx)
{
assert(ctx);
if (!ctx) {
pr_err("Expected a non-null ptr for HsaKFDContext");
return NULL;
}
if (ctx->debug_context)
return ctx->debug_context;
ctx->debug_context = calloc(1, sizeof(struct hsa_kfd_debug_context));
if (!ctx->debug_context) {
pr_err("Alloc memory failed for struct hsa_kfd_debug_context size %zu\n",
sizeof(struct hsa_kfd_debug_context));
return NULL;
}
return ctx->debug_context;
}
HSAKMT_STATUS hsakmt_init_device_debugging_memory(HsaKFDContext *ctx, unsigned int NumNodes)
{
unsigned int i;
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
if (!debug_ctx)
return HSAKMT_STATUS_NO_MEMORY;
is_device_debugged = malloc(NumNodes * sizeof(bool));
if (!is_device_debugged)
debug_ctx->is_device_debugged = malloc(NumNodes * sizeof(bool));
if (!debug_ctx->is_device_debugged)
return HSAKMT_STATUS_NO_MEMORY;
for (i = 0; i < NumNodes; i++)
is_device_debugged[i] = false;
debug_ctx->is_device_debugged[i] = false;
return HSAKMT_STATUS_SUCCESS;
}
void hsakmt_destroy_device_debugging_memory(void)
void hsakmt_destroy_device_debugging_memory(HsaKFDContext *ctx)
{
if (is_device_debugged) {
free(is_device_debugged);
is_device_debugged = NULL;
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
if (!debug_ctx)
return;
if (debug_ctx->is_device_debugged) {
free(debug_ctx->is_device_debugged);
debug_ctx->is_device_debugged = NULL;
}
}
bool hsakmt_debug_get_reg_status(uint32_t node_id)
bool hsakmt_debug_get_reg_status(HsaKFDContext *ctx, uint32_t node_id)
{
return is_device_debugged[node_id];
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
if (!debug_ctx || !debug_ctx->is_device_debugged)
return false;
return debug_ctx->is_device_debugged[node_id];
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
@@ -66,11 +110,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
uint32_t gpu_id;
CHECK_KFD_OPEN();
if (!is_device_debugged)
struct hsa_kfd_debug_context *debug_ctx =
hsakmt_kfdcontext_get_debug_context(&hsakmt_primary_kfd_ctx);
if (!debug_ctx->is_device_debugged)
return HSAKMT_STATUS_NO_MEMORY;
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
@@ -94,11 +139,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId)
HSAKMT_STATUS result;
CHECK_KFD_OPEN();
if (!is_device_debugged)
struct hsa_kfd_debug_context *debug_ctx =
hsakmt_kfdcontext_get_debug_context(&hsakmt_primary_kfd_ctx);
if (!debug_ctx->is_device_debugged)
return HSAKMT_STATUS_NO_MEMORY;
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
@@ -126,7 +172,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId,
CHECK_KFD_OPEN();
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
@@ -195,11 +241,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
uint32_t watch_event_items = WatchEvent != NULL ? NumWatchPoints:0;
struct kfd_ioctl_dbg_address_watch_args *args;
HSAuint32 i = 0;
HSAuint32 i = 0;
CHECK_KFD_OPEN();
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
@@ -268,19 +314,19 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
#define HSA_RUNTIME_ENABLE_MAX_MAJOR 1
#define HSA_RUNTIME_ENABLE_MIN_MINOR 13
HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void) {
HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupportCtx(HsaKFDContext *ctx) {
HsaNodeProperties node = {0};
HsaSystemProperties props = {0};
HsaVersionInfo versionInfo = {0};
memset(&node, 0x00, sizeof(node));
memset(&props, 0x00, sizeof(props));
if (hsaKmtAcquireSystemProperties(&props))
if (hsaKmtAcquireSystemPropertiesCtx(ctx, &props))
return HSAKMT_STATUS_ERROR;
//the firmware of gpu node doesn't support the debugger, disable it.
for (uint32_t i = 0; i < props.NumNodes; i++) {
if (hsaKmtGetNodeProperties(i, &node))
if (hsaKmtGetNodePropertiesCtx(ctx, i, &node))
return HSAKMT_STATUS_ERROR;
//ignore cpu node
@@ -302,12 +348,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void) {
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnableCtx(HsaKFDContext *ctx,
void *rDebug,
bool setupTtmp)
{
struct kfd_ioctl_runtime_enable_args args = {0};
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
struct kfd_ioctl_runtime_enable_args args = {0};
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupportCtx(ctx);
if (result)
return result;
@@ -316,7 +364,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
((setupTtmp) ? KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK : 0);
args.r_debug = (HSAuint64)rDebug;
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
long err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
if (err) {
if (errno == EBUSY)
@@ -324,15 +372,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
else
return HSAKMT_STATUS_ERROR;
}
runtime_capabilities_mask= args.capabilities_mask;
debug_ctx->runtime_capabilities_mask= args.capabilities_mask;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisableCtx(HsaKFDContext *ctx)
{
struct kfd_ioctl_runtime_enable_args args = {0};
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupportCtx(ctx);
if (result)
return result;
@@ -340,19 +388,23 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
memset(&args, 0x00, sizeof(args));
args.mode_mask = 0; //Disable
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilities(HSAuint32 *caps_mask)
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilitiesCtx(HsaKFDContext *ctx,
HSAuint32 *caps_mask)
{
*caps_mask = runtime_capabilities_mask;
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
*caps_mask = debug_ctx->runtime_capabilities_mask;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
static HSAKMT_STATUS dbg_trap_get_device_data(HsaKFDContext *ctx,
void *data,
uint32_t *n_entries,
uint32_t entry_size)
{
@@ -363,14 +415,15 @@ static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
args.device_snapshot.entry_size = entry_size;
args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
*n_entries = args.device_snapshot.num_devices;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
static HSAKMT_STATUS dbg_trap_get_queue_data(HsaKFDContext *ctx,
void *data,
uint32_t *n_entries,
uint32_t entry_size,
uint32_t *queue_ids)
@@ -384,7 +437,7 @@ static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
args.queue_snapshot.snapshot_buf_ptr = (uint64_t) data;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
*n_entries = args.queue_snapshot.num_queues;
@@ -398,7 +451,8 @@ static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
static HSAKMT_STATUS dbg_trap_suspend_queues(HsaKFDContext *ctx,
uint32_t *queue_ids,
uint32_t num_queues)
{
struct kfd_ioctl_dbg_trap_args args = {0};
@@ -410,7 +464,7 @@ static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
args.pid = getpid();
r = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args);
r = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args);
if (r < 0)
return HSAKMT_STATUS_ERROR;
@@ -420,7 +474,8 @@ static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
/* Debugger support has been in KFD ABI 1.13. */
#define KFD_MINOR_MIN_DEBUG 13
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnableCtx(HsaKFDContext *ctx,
void **runtime_info,
HSAuint32 *data_size)
{
struct kfd_ioctl_dbg_trap_args args = {0};
@@ -429,7 +484,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
*data_size = sizeof(struct kfd_runtime_info);
args.enable.rinfo_size = *data_size;
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
args.enable.dbg_fd = ctx->fd;
*runtime_info = malloc(args.enable.rinfo_size);
if (!*runtime_info)
return HSAKMT_STATUS_NO_MEMORY;
@@ -437,30 +492,31 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
args.op = KFD_IOC_DBG_TRAP_ENABLE;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args)) {
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args)) {
free(*runtime_info);
return HSAKMT_STATUS_ERROR;
}
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisableCtx(HsaKFDContext *ctx)
{
struct kfd_ioctl_dbg_trap_args args = {0};
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
args.enable.dbg_fd = ctx->fd;
args.op = KFD_IOC_DBG_TRAP_DISABLE;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceDataCtx(HsaKFDContext *ctx,
void **data,
HSAuint32 *n_entries,
HSAuint32 *entry_size)
{
@@ -473,14 +529,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
*data = malloc(*entry_size * *n_entries);
if (!*data)
return ret;
ret = dbg_trap_get_device_data(*data, n_entries, *entry_size);
ret = dbg_trap_get_device_data(ctx, *data, n_entries, *entry_size);
if (ret)
free(*data);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueDataCtx(HsaKFDContext *ctx,
void **data,
HSAuint32 *n_entries,
HSAuint32 *entry_size,
bool suspend_queues)
@@ -491,7 +548,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
*entry_size = sizeof(struct kfd_queue_snapshot_entry);
*n_entries = 0;
if (dbg_trap_get_queue_data(NULL, n_entries, *entry_size, NULL))
if (dbg_trap_get_queue_data(ctx, NULL, n_entries, *entry_size, NULL))
return HSAKMT_STATUS_ERROR;
*data = malloc(*n_entries * *entry_size);
if (!*data)
@@ -499,11 +556,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
if (suspend_queues && *n_entries)
queue_ids = (uint32_t *)malloc(sizeof(uint32_t) * *n_entries);
if (!queue_ids ||
dbg_trap_get_queue_data(*data, n_entries, *entry_size, queue_ids))
dbg_trap_get_queue_data(ctx, *data, n_entries, *entry_size, queue_ids))
goto free_data;
if (queue_ids) {
if (dbg_trap_suspend_queues(queue_ids, *n_entries) ||
dbg_trap_get_queue_data(*data, n_entries, *entry_size, NULL))
if (dbg_trap_suspend_queues(ctx, queue_ids, *n_entries) ||
dbg_trap_get_queue_data(ctx, *data, n_entries, *entry_size, NULL))
goto free_data;
free(queue_ids);
}
@@ -516,9 +573,10 @@ free_data:
return HSAKMT_STATUS_ERROR;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *args,
HSA_QUEUEID *Queues,
HSAuint64 *DebugReturn)
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctlCtx(HsaKFDContext *ctx,
struct kfd_ioctl_dbg_trap_args *args,
HSA_QUEUEID *Queues,
HSAuint64 *DebugReturn)
{
HSAKMT_STATUS result;
@@ -540,7 +598,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *arg
free(queue_ids);
}
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, args);
long err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, args);
if (DebugReturn)
*DebugReturn = err;
@@ -557,3 +615,58 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *arg
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void)
{
return hsaKmtCheckRuntimeDebugSupportCtx(&hsakmt_primary_kfd_ctx);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
bool setupTtmp)
{
return hsaKmtRuntimeEnableCtx(&hsakmt_primary_kfd_ctx, rDebug, setupTtmp);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
{
return hsaKmtRuntimeDisableCtx(&hsakmt_primary_kfd_ctx);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilities(HSAuint32 *caps_mask)
{
return hsaKmtGetRuntimeCapabilitiesCtx(&hsakmt_primary_kfd_ctx, caps_mask);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
HSAuint32 *data_size)
{
return hsaKmtDbgEnableCtx(&hsakmt_primary_kfd_ctx, runtime_info, data_size);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
{
return hsaKmtDbgDisableCtx(&hsakmt_primary_kfd_ctx);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
HSAuint32 *n_entries,
HSAuint32 *entry_size)
{
return hsaKmtDbgGetDeviceDataCtx(&hsakmt_primary_kfd_ctx, data, n_entries, entry_size);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
HSAuint32 *n_entries,
HSAuint32 *entry_size,
bool suspend_queues)
{
return hsaKmtDbgGetQueueDataCtx(&hsakmt_primary_kfd_ctx, data,
n_entries, entry_size, suspend_queues);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *args,
HSA_QUEUEID *Queues,
HSAuint64 *DebugReturn)
{
return hsaKmtDebugTrapIoctlCtx(&hsakmt_primary_kfd_ctx, args, Queues, DebugReturn);
}
@@ -307,7 +307,7 @@ static HSAKMT_STATUS get_mem_info_svm_api(HsaKFDContext *ctx, uint64_t address,
args->attrs[i].value == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
node_id = args->attrs[i].value;
else
hsakmt_gpuid_to_nodeid(args->attrs[i].value, &node_id);
hsakmt_gpuid_to_nodeid(ctx, args->attrs[i].value, &node_id);
switch (args->attrs[i].type) {
case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
pr_err("Preferred location for address 0x%lx is Node id %d\n",
@@ -359,7 +359,7 @@ static void analysis_memory_exception(HsaKFDContext *ctx,
uint32_t node_id = 0;
unsigned int i;
hsakmt_gpuid_to_nodeid(memory_exception_data->gpu_id, &node_id);
hsakmt_gpuid_to_nodeid(ctx, memory_exception_data->gpu_id, &node_id);
pr_err("Memory exception on virtual address 0x%lx, ", addr);
pr_err("node id %d : ", node_id);
if (memory_exception_data->failure.NotPresent)
@@ -468,7 +468,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_ExtCtx(HsaKFDContext *ctx,
if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY &&
event_data[i].memory_exception_data.gpu_id) {
Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
result = hsakmt_gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
result = hsakmt_gpuid_to_nodeid(ctx, event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
if (result != HSAKMT_STATUS_SUCCESS)
goto out;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
@@ -483,7 +483,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_ExtCtx(HsaKFDContext *ctx,
} else if (Events[i]->EventData.EventType == HSA_EVENTTYPE_HW_EXCEPTION &&
event_data[i].hw_exception_data.gpu_id) {
result = hsakmt_gpuid_to_nodeid(event_data[i].hw_exception_data.gpu_id, &Events[i]->EventData.EventData.HwException.NodeId);
result = hsakmt_gpuid_to_nodeid(ctx, event_data[i].hw_exception_data.gpu_id, &Events[i]->EventData.EventData.HwException.NodeId);
if (result != HSAKMT_STATUS_SUCCESS)
goto out;
@@ -515,7 +515,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMICtx(HsaKFDContext *ctx, HSAuint32 NodeId, i
pr_debug("[%s] node %d\n", __func__, NodeId);
result = hsakmt_validate_nodeid(NodeId, &gpuid);
result = hsakmt_validate_nodeid(ctx, NodeId, &gpuid);
if (result != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return result;
File diff soppresso perché troppo grande Carica Diff
@@ -740,6 +740,156 @@ hsaKmtAllocQueueGWSCtx(
HSAuint32 *firstGWS //OUT
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRuntimeEnableCtx(
HsaKFDContext *ctx, //IN
void* rDebug, //IN
bool setupTtmp //IN
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRuntimeDisableCtx(
HsaKFDContext *ctx //IN
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetRuntimeCapabilitiesCtx(
HsaKFDContext *ctx, //IN
HSAuint32 *caps_mask //OUT
);
/**
Enable debug trap.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgEnableCtx(
HsaKFDContext *ctx, //IN
void **runtime_info, //Out
HSAuint32 *data_size //Out
);
/**
Disable debug trap.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgDisableCtx(
HsaKFDContext *ctx //IN
);
/**
Get device snapshot.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgGetDeviceDataCtx(
HsaKFDContext *ctx, //IN
void **data, //Out
HSAuint32 *n_entries, //Out
HSAuint32 *entry_size //Out
);
/**
Get queues snapshot.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDbgGetQueueDataCtx(
HsaKFDContext *ctx, //IN
void **data, //Out
HSAuint32 *n_entries, //Out
HSAuint32 *entry_size, //Out
bool suspend_queues //In
);
/**
Check whether gpu firmware and kernel support debugging
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCheckRuntimeDebugSupportCtx(
HsaKFDContext *ctx //IN
);
/**
Debug ops call primarily used for KFD testing
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDebugTrapIoctlCtx(
HsaKFDContext *ctx, //IN
struct kfd_ioctl_dbg_trap_args *args, //IN/OUT
HSA_QUEUEID *Queues, //IN
HSAuint64 *DebugReturn //OUT
);
/**
Gets GPU and CPU clock counters for particular Node
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetClockCountersCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HsaClockCounters *Counters); //OUT
/**
Retrieves information on the available HSA counters
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcGetCounterPropertiesCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HsaCounterProperties** CounterProperties //OUT
);
/**
Registers a set of (HW) counters to be used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcRegisterTraceCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSAuint32 NumberOfCounters, //IN
HsaCounter* Counters, //IN
HsaPmcTraceRoot* TraceRoot //OUT
);
/**
Allows a user mode process to get exclusive access to the defined set of (HW) counters
used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcAcquireTraceAccessCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSATraceId TraceId //IN
);
/**
Allows a user mode process to release exclusive access to the defined set of (HW) counters
used for tracing/profiling
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtPmcReleaseTraceAccessCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSATraceId TraceId //IN
);
/* Helper functions for calling KFD SVM ioctl */
HSAKMT_STATUS
HSAKMTAPI
@@ -282,7 +282,7 @@ void model_init(void)
for (unsigned node_id = 0; node_id < props.NumNodes; node_id++)
{
HsaNodeProperties node_props;
result = hsakmt_topology_get_node_props(node_id, &node_props);
result = hsakmt_topology_get_node_props(&hsakmt_primary_kfd_ctx, node_id, &node_props);
if (result != HSAKMT_STATUS_SUCCESS)
{
fprintf(stderr, "model: Failed to get node %u properties\n", node_id);
@@ -37,9 +37,12 @@ void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx)
assert(ctx);
ctx->fd = fd;
ctx->topology_context = NULL;
ctx->queue_context = NULL;
ctx->fmm_context = NULL;
ctx->event_context = NULL;
ctx->debug_context = NULL;
ctx->perf_context = NULL;
}
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
@@ -47,6 +50,10 @@ void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
if (!ctx)
return;
if (ctx->topology_context) {
free(ctx->topology_context);
ctx->topology_context = NULL;
}
if (ctx->queue_context) {
free(ctx->queue_context);
ctx->queue_context = NULL;
@@ -59,5 +66,13 @@ void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
free(ctx->event_context);
ctx->event_context = NULL;
}
if (ctx->debug_context) {
free(ctx->debug_context);
ctx->debug_context = NULL;
}
if (ctx->perf_context) {
free(ctx->perf_context);
ctx->perf_context = NULL;
}
ctx->fd = -1;
}
@@ -28,9 +28,12 @@
#include <stdint.h>
struct hsa_kfd_topology_context;
struct hsa_kfd_queue_context;
struct hsa_kfd_fmm_context;
struct hsa_kfd_event_context;
struct hsa_kfd_debug_context;
struct hsa_kfd_perf_context;
/*
* HsaKFDContext
@@ -52,6 +55,9 @@ typedef struct _HsaKFDContext
/* File descriptor for the KFD device */
int fd;
/* Topology context for managing system topology information */
struct hsa_kfd_topology_context *topology_context;
/* Queue context for managing user queues */
struct hsa_kfd_queue_context *queue_context;
@@ -60,6 +66,12 @@ typedef struct _HsaKFDContext
/* Event context for managing events */
struct hsa_kfd_event_context *event_context;
/* Debug context for managing debug operations */
struct hsa_kfd_debug_context *debug_context;
/* perf context for managing perf operations */
struct hsa_kfd_perf_context *perf_context;
} HsaKFDContext;
// Initialize a pre-allocated HsaKFDContext with the given file descriptor
@@ -67,8 +79,10 @@ void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx);
// Release all resources associated with the given KFD context
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx);
struct hsa_kfd_topology_context *hsakmt_kfdcontext_get_topology_context(HsaKFDContext *ctx);
struct hsa_kfd_fmm_context *hsakmt_kfdcontext_get_fmm_context(HsaKFDContext *ctx);
struct hsa_kfd_queue_context *hsakmt_kfdcontext_get_queue_context(HsaKFDContext *ctx);
struct hsa_kfd_event_context *hsakmt_kfdcontext_get_event_context(HsaKFDContext *ctx);
struct hsa_kfd_debug_context *hsakmt_kfdcontext_get_debug_context(HsaKFDContext *ctx);
struct hsa_kfd_perf_context *hsakmt_kfdcontext_get_perf_context(HsaKFDContext *ctx);
#endif /* _KFDCONTEXT_H_ */
@@ -188,23 +188,26 @@ HSAKMT_STATUS hsakmt_init_kfd_version(void);
#define IS_SOC15(gfxv) ((gfxv) >= GFX_VERSION_VEGA10)
HSAKMT_STATUS hsakmt_validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
uint32_t hsakmt_get_gfxv_by_node_id(HSAuint32 node_id);
bool hsakmt_prefer_ats(HSAuint32 node_id);
uint16_t hsakmt_get_device_id_by_node_id(HSAuint32 node_id);
uint16_t hsakmt_get_device_id_by_gpu_id(HSAuint32 gpu_id);
uint32_t hsakmt_get_direct_link_cpu(uint32_t gpu_node);
HSAKMT_STATUS hsakmt_validate_nodeid(HsaKFDContext *ctx, uint32_t nodeid, uint32_t *gpu_id);
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(HsaKFDContext *ctx, uint32_t gpu_id, uint32_t* node_id);
uint32_t hsakmt_get_gfxv_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id);
bool hsakmt_prefer_ats(HsaKFDContext *ctx, HSAuint32 node_id);
uint16_t hsakmt_get_device_id_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id);
uint16_t hsakmt_get_device_id_by_gpu_id(HsaKFDContext *ctx, HSAuint32 gpu_id);
uint32_t hsakmt_get_direct_link_cpu(HsaKFDContext *ctx, uint32_t gpu_node);
int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
HSAKMT_STATUS hsakmt_validate_nodeid_array(HsaKFDContext *ctx,
uint32_t **gpu_id_array,
uint32_t NumberOfNodes, uint32_t *NodeArray);
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx, HsaSystemProperties *props);
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
HSAKMT_STATUS hsakmt_topology_get_node_props(HsaKFDContext *ctx,
HSAuint32 NodeId,
HsaNodeProperties *NodeProperties);
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
HSAuint32 NumIoLinks,
HsaIoLinkProperties *IoLinkProperties);
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSAuint32 NumIoLinks,
HsaIoLinkProperties *IoLinkProperties);
void hsakmt_topology_setup_is_dgpu_param(HsaNodeProperties *props);
bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId);
@@ -212,7 +215,7 @@ HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags);
HSAuint64 MapDrmPerm(HsaMemoryMapFlags flags);
void* hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
uint32_t size, uint32_t align,
uint32_t size, uint32_t align,
uint32_t gpu_id,
uint32_t NodeId, bool NonPaged,
bool DeviceLocal, bool Uncached);
@@ -221,11 +224,11 @@ void hsakmt_free_exec_aligned_memory_gpu(HsaKFDContext *ctx,
HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx,
unsigned int NumNodes);
void hsakmt_destroy_process_doorbells(HsaKFDContext *ctx);
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes);
void hsakmt_destroy_device_debugging_memory(void);
bool hsakmt_debug_get_reg_status(uint32_t node_id);
HSAKMT_STATUS hsakmt_init_counter_props(unsigned int NumNodes);
void hsakmt_destroy_counter_props(void);
HSAKMT_STATUS hsakmt_init_device_debugging_memory(HsaKFDContext *ctx, unsigned int NumNodes);
void hsakmt_destroy_device_debugging_memory(HsaKFDContext *ctx);
bool hsakmt_debug_get_reg_status(HsaKFDContext *ctx, uint32_t node_id);
HSAKMT_STATUS hsakmt_init_counter_props(HsaKFDContext *ctx, unsigned int NumNodes);
void hsakmt_destroy_counter_props(HsaKFDContext *ctx);
uint32_t *hsakmt_convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues);
extern int hsakmt_ioctl(int fd, unsigned long request, void *arg);
@@ -250,7 +253,7 @@ void hsakmt_clear_events_page(HsaKFDContext *ctx);
void hsakmt_fmm_clear_all_mem(HsaKFDContext *ctx);
void hsakmt_fmm_clear_all_aperture(HsaKFDContext *ctx);
void hsakmt_clear_process_doorbells(HsaKFDContext *ctx);
uint32_t hsakmt_get_num_sysfs_nodes(void);
uint32_t hsakmt_get_num_sysfs_nodes(HsaKFDContext *ctx);
bool hsakmt_is_forked_child(void);
@@ -55,11 +55,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicyCtx(HsaKFDContext *ctx,
pr_debug("[%s] node %d; default %d; alternate %d\n",
__func__, Node, DefaultPolicy, AlternatePolicy);
result = hsakmt_validate_nodeid(Node, &gpu_id);
result = hsakmt_validate_nodeid(ctx, Node, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
if (hsakmt_get_gfxv_by_node_id(Node) != GFX_VERSION_KAVERI)
if (hsakmt_get_gfxv_by_node_id(ctx, Node) != GFX_VERSION_KAVERI)
/* This is a legacy API useful on Kaveri only. On dGPU
* the alternate aperture is setup and used
* automatically for coherent allocations. Don't let
@@ -137,7 +137,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlignCtx(HsaKFDContext *ctx,
pr_debug("[%s] node %d\n", __func__, PreferredNode);
result = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
result = hsakmt_validate_nodeid(ctx, PreferredNode, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
return result;
@@ -254,7 +254,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemoryCtx(HsaKFDContext *ctx,
pr_debug("[%s] node %d\n", __func__, Node);
result = hsakmt_validate_nodeid(Node, &args.gpu_id);
result = hsakmt_validate_nodeid(ctx, Node, &args.gpu_id);
if (result != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, Node);
return result;
@@ -304,7 +304,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodesCtx(HsaKFDContext *ctx,
/* TODO: support mixed APU and dGPU configurations */
return HSAKMT_STATUS_NOT_SUPPORTED;
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array,
NumberOfNodes, NodeArray);
if (ret == HSAKMT_STATUS_SUCCESS) {
@@ -385,7 +385,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExtCtx(HsaKFDContext
pr_debug("[%s] number of nodes %lu\n", __func__, NumberOfNodes);
if (NodeArray != NULL || NumberOfNodes != 0) {
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array,
NumberOfNodes, NodeArray);
}
@@ -467,7 +467,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodesCtx(HsaKFDContext *ctx,
return HSAKMT_STATUS_INVALID_PARAMETER;
if (NodeArray) {
ret = hsakmt_validate_nodeid_array(&gpu_id_array, NumberOfNodes, NodeArray);
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array, NumberOfNodes, NodeArray);
if (ret != HSAKMT_STATUS_SUCCESS)
goto error;
}
@@ -567,7 +567,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodesCtx(HsaKFDContext *ctx,
return hsaKmtMapMemoryToGPUCtx(ctx, MemoryAddress,
MemorySizeInBytes, AlternateVAGPU);
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array,
NumberOfNodes, NodeArray);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
@@ -633,7 +633,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfigCtx(HsaKFDContext *ctx,
pr_debug("[%s] node %d\n", __func__, NodeId);
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
@@ -106,7 +106,7 @@ static void clear_after_fork(HsaKFDContext *ctx)
hsakmt_clear_process_doorbells(ctx);
hsakmt_clear_events_page(ctx);
hsakmt_fmm_clear_all_mem(ctx);
hsakmt_destroy_device_debugging_memory();
hsakmt_destroy_device_debugging_memory(ctx);
int fd = ctx->fd;
if (fd >= 0) {
@@ -226,10 +226,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFDCtx(HsaKFDContext **pCtx)
hsakmt_kfd_open_count = 1;
if (hsakmt_init_device_debugging_memory(sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS)
if (hsakmt_init_device_debugging_memory(&hsakmt_primary_kfd_ctx, sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS)
pr_warn("Insufficient Memory. Debugging unavailable\n");
hsakmt_init_counter_props(sys_props.NumNodes);
hsakmt_init_counter_props(&hsakmt_primary_kfd_ctx, sys_props.NumNodes);
*pCtx = &hsakmt_primary_kfd_ctx;
if (!atfork_installed) {
@@ -269,8 +269,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFDCtx(void)
if (hsakmt_kfd_open_count > 0) {
if (--hsakmt_kfd_open_count == 0) {
hsakmt_destroy_counter_props();
hsakmt_destroy_device_debugging_memory();
hsakmt_destroy_counter_props(&hsakmt_primary_kfd_ctx);
hsakmt_destroy_device_debugging_memory(&hsakmt_primary_kfd_ctx);
hsakmt_fmm_clear_all_aperture(&hsakmt_primary_kfd_ctx);
}
@@ -52,7 +52,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingQueryCapabilities(HSAuint32 NodeId, void
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(16);
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
@@ -99,7 +99,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingCreate(HSAuint32 NodeId, HsaPcSamplingIn
CHECK_KFD_OPEN();
*traceId = INVALID_TRACE_ID;
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
@@ -139,7 +139,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingDestroy(HSAuint32 NodeId, HsaPcSamplingT
CHECK_KFD_OPEN();
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
@@ -171,7 +171,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStart(HSAuint32 NodeId, HsaPcSamplingTra
CHECK_KFD_OPEN();
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
@@ -210,7 +210,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStop(HSAuint32 NodeId, HsaPcSamplingTrac
CHECK_KFD_OPEN();
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
+101 -38
Vedi File
@@ -37,6 +37,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include <semaphore.h>
#include <assert.h>
#define BITS_PER_BYTE CHAR_BIT
@@ -75,8 +76,32 @@ struct perf_counts_values {
};
};
static HsaCounterProperties **counter_props;
static unsigned int counter_props_count;
struct hsa_kfd_perf_context
{
HsaCounterProperties **counter_props;
unsigned int counter_props_count;
};
struct hsa_kfd_perf_context *hsakmt_kfdcontext_get_perf_context(HsaKFDContext *ctx)
{
assert(ctx);
if (!ctx) {
pr_err("Expected a non-null ptr for HsaKFDContext");
return NULL;
}
if (ctx->perf_context)
return ctx->perf_context;
ctx->perf_context = calloc(1, sizeof(struct hsa_kfd_perf_context));
if (!ctx->perf_context) {
pr_err("Alloc memory failed for struct hsa_kfd_perf_context size %zu\n",
sizeof(struct hsa_kfd_perf_context));
return NULL;
}
return ctx->perf_context;
}
static ssize_t readn(int fd, void *buf, size_t n)
{
@@ -99,33 +124,35 @@ static ssize_t readn(int fd, void *buf, size_t n)
return n;
}
HSAKMT_STATUS hsakmt_init_counter_props(unsigned int NumNodes)
HSAKMT_STATUS hsakmt_init_counter_props(HsaKFDContext *ctx, unsigned int NumNodes)
{
counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
if (!counter_props) {
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
perf_ctx->counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
if (!perf_ctx->counter_props) {
pr_warn("Profiling is not available.\n");
return HSAKMT_STATUS_NO_MEMORY;
}
counter_props_count = NumNodes;
perf_ctx->counter_props_count = NumNodes;
return HSAKMT_STATUS_SUCCESS;
}
void hsakmt_destroy_counter_props(void)
void hsakmt_destroy_counter_props(HsaKFDContext *ctx)
{
unsigned int i;
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
if (!counter_props)
if (!perf_ctx->counter_props)
return;
for (i = 0; i < counter_props_count; i++)
if (counter_props[i]) {
free(counter_props[i]);
counter_props[i] = NULL;
for (i = 0; i < perf_ctx->counter_props_count; i++)
if (perf_ctx->counter_props[i]) {
free(perf_ctx->counter_props[i]);
perf_ctx->counter_props[i] = NULL;
}
free(counter_props);
free(perf_ctx->counter_props);
}
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
@@ -211,11 +238,12 @@ static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
return rc;
}
static HSAuint32 get_block_concurrent_limit(uint32_t node_id,
static HSAuint32 get_block_concurrent_limit(struct hsa_kfd_perf_context *perf_ctx,
uint32_t node_id,
HSAuint32 block_id)
{
uint32_t i;
HsaCounterBlockProperties *block = &counter_props[node_id]->Blocks[0];
HsaCounterBlockProperties *block = &perf_ctx->counter_props[node_id]->Blocks[0];
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
if (block->Counters[0].BlockIndex == block_id)
@@ -254,7 +282,8 @@ static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterPropertiesCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HsaCounterProperties **CounterProperties)
{
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
@@ -265,23 +294,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
struct perf_counter_block block = {0};
uint32_t total_blocks = 0;
HsaCounterBlockProperties *block_prop;
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
if (!counter_props)
if (!perf_ctx->counter_props)
return HSAKMT_STATUS_NO_MEMORY;
if (!CounterProperties)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (counter_props[NodeId]) {
*CounterProperties = counter_props[NodeId];
if (perf_ctx->counter_props[NodeId]) {
*CounterProperties = perf_ctx->counter_props[NodeId];
return HSAKMT_STATUS_SUCCESS;
}
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
rc = hsakmt_get_block_properties(NodeId, i, &block);
rc = hsakmt_get_block_properties(ctx, NodeId, i, &block);
if (rc != HSAKMT_STATUS_SUCCESS)
return rc;
total_concurrent += block.num_of_slots;
@@ -295,19 +325,19 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
sizeof(HsaCounterBlockProperties) * (total_blocks - 1) +
sizeof(HsaCounter) * (total_counters - total_blocks);
counter_props[NodeId] = malloc(counter_props_size);
if (!counter_props[NodeId])
perf_ctx->counter_props[NodeId] = malloc(counter_props_size);
if (!perf_ctx->counter_props[NodeId])
return HSAKMT_STATUS_NO_MEMORY;
counter_props[NodeId]->NumBlocks = total_blocks;
counter_props[NodeId]->NumConcurrent = total_concurrent;
perf_ctx->counter_props[NodeId]->NumBlocks = total_blocks;
perf_ctx->counter_props[NodeId]->NumConcurrent = total_concurrent;
block_prop = &counter_props[NodeId]->Blocks[0];
block_prop = &perf_ctx->counter_props[NodeId]->Blocks[0];
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
rc = hsakmt_get_block_properties(NodeId, block_id, &block);
rc = hsakmt_get_block_properties(ctx, NodeId, block_id, &block);
if (rc != HSAKMT_STATUS_SUCCESS) {
free(counter_props[NodeId]);
counter_props[NodeId] = NULL;
free(perf_ctx->counter_props[NodeId]);
perf_ctx->counter_props[NodeId] = NULL;
return rc;
}
@@ -329,13 +359,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
}
*CounterProperties = counter_props[NodeId];
*CounterProperties = perf_ctx->counter_props[NodeId];
return HSAKMT_STATUS_SUCCESS;
}
/* Registers a set of (HW) counters to be used for tracing/profiling */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTraceCtx(HsaKFDContext* ctx,
HSAuint32 NodeId,
HSAuint32 NumberOfCounters,
HsaCounter *Counters,
HsaPmcTraceRoot *TraceRoot)
@@ -353,6 +384,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
uint32_t block, num_blocks = 0, total_counters = 0;
uint64_t *counter_id_ptr;
int *fd_ptr;
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
pr_debug("[%s] Number of counters %d\n", __func__, NumberOfCounters);
@@ -362,7 +394,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
return HSAKMT_STATUS_NO_MEMORY;
}
if (!counter_props) {
if (!perf_ctx->counter_props) {
pr_err("Profiling is not available, counter_props is NULL.\n");
goto no_memory_exit;
}
@@ -370,7 +402,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
if (!Counters || !TraceRoot || NumberOfCounters == 0)
goto invalid_parameter_exit;
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) {
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) {
free(counter_id);
return HSAKMT_STATUS_INVALID_NODE_UNIT;
}
@@ -408,7 +440,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
if (!num_counters[i])
continue;
concurrent_limit = get_block_concurrent_limit(NodeId, i);
concurrent_limit = get_block_concurrent_limit(perf_ctx, NodeId, i);
if (!concurrent_limit) {
pr_err("Invalid block ID: %d\n", i);
goto invalid_parameter_exit;
@@ -509,7 +541,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
/* Unregisters a set of (HW) counters used for tracing/profiling */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTraceCtx(HsaKFDContext* ctx,
HSAuint32 NodeId,
HSATraceId TraceId)
{
uint32_t gpu_id;
@@ -520,7 +553,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
@@ -544,7 +577,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccessCtx(HsaKFDContext* ctx,
HSAuint32 NodeId,
HSATraceId TraceId)
{
struct perf_trace *trace;
@@ -561,7 +595,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
return ret;
@@ -692,3 +726,32 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId)
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
HsaCounterProperties **CounterProperties)
{
return hsaKmtPmcGetCounterPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, CounterProperties);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
HSAuint32 NumberOfCounters,
HsaCounter *Counters,
HsaPmcTraceRoot *TraceRoot)
{
return hsaKmtPmcRegisterTraceCtx(&hsakmt_primary_kfd_ctx,
NodeId, NumberOfCounters, Counters, TraceRoot);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
HSATraceId TraceId)
{
return hsaKmtPmcUnregisterTraceCtx(&hsakmt_primary_kfd_ctx,
NodeId, TraceId);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
HSATraceId TraceId)
{
return hsaKmtPmcAcquireTraceAccessCtx(&hsakmt_primary_kfd_ctx,
NodeId, TraceId);
}
@@ -1958,12 +1958,13 @@ static struct perf_counter_block navi_blocks[PERFCOUNTER_BLOCKID__MAX] = {
},
};
HSAKMT_STATUS hsakmt_get_block_properties(uint32_t node_id,
HSAKMT_STATUS hsakmt_get_block_properties(HsaKFDContext *ctx,
uint32_t node_id,
enum perf_block_id block_id,
struct perf_counter_block *block)
{
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(node_id);
uint16_t dev_id = hsakmt_get_device_id_by_node_id(node_id);
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(ctx, node_id);
uint16_t dev_id = hsakmt_get_device_id_by_node_id(ctx, node_id);
if (block_id >= PERFCOUNTER_BLOCKID__MAX ||
block_id < PERFCOUNTER_BLOCKID__FIRST)
@@ -67,7 +67,8 @@ struct perf_counter_block {
uint64_t counter_mask;
};
HSAKMT_STATUS hsakmt_get_block_properties(uint32_t node_id,
HSAKMT_STATUS hsakmt_get_block_properties(HsaKFDContext *ctx,
uint32_t node_id,
enum perf_block_id block_id,
struct perf_counter_block *block);
+12 -10
Vedi File
@@ -148,14 +148,15 @@ HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx, unsigned int Num
return ret;
}
static void get_doorbell_map_info(uint32_t node_id,
static void get_doorbell_map_info(HsaKFDContext *ctx,
uint32_t node_id,
struct process_doorbells *doorbell)
{
/*
* GPUVM doorbell on Tonga requires a workaround for VM TLB ACTIVE bit
* lookup bug. Remove ASIC check when this is implemented in amdgpu.
*/
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(node_id);
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(ctx, node_id);
doorbell->use_gpuvm = (hsakmt_is_dgpu && gfxv != GFX_VERSION_TONGA);
doorbell->size = DOORBELLS_PAGE_SIZE(DOORBELL_SIZE(gfxv));
@@ -272,7 +273,7 @@ static HSAKMT_STATUS map_doorbell(HsaKFDContext *ctx,
return HSAKMT_STATUS_SUCCESS;
}
get_doorbell_map_info(NodeId, &doorbells[NodeId]);
get_doorbell_map_info(ctx, NodeId, &doorbells[NodeId]);
if (doorbells[NodeId].use_gpuvm) {
status = map_doorbell_dgpu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
@@ -385,7 +386,7 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
* nonPaged=0 system memory allocation uses GTT path
*/
if (!nonPaged) {
cpu_id = hsakmt_get_direct_link_cpu(NodeId);
cpu_id = hsakmt_get_direct_link_cpu(ctx, NodeId);
if (cpu_id == INVALID_NODEID) {
flags.ui32.NoNUMABind = 1;
cpu_id = 0;
@@ -460,7 +461,8 @@ static void free_exec_aligned_memory(HsaKFDContext *ctx,
munmap(addr, size);
}
static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
static HSAKMT_STATUS register_svm_range(HsaKFDContext *ctx,
void *mem, uint32_t size,
uint32_t gpuNode, uint32_t prefetchNode,
uint32_t preferredNode, bool alwaysMapped)
{
@@ -493,7 +495,7 @@ static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
attrs[5].type = HSA_SVM_ATTR_GRANULARITY;
attrs[5].value = 0xFF;
return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
return hsaKmtSVMSetAttrCtx(ctx, mem, size, nattr, attrs);
}
static void free_queue(HsaKFDContext *ctx, struct queue *q)
@@ -599,7 +601,7 @@ static int handle_concrete_asic(HsaKFDContext *ctx,
fill_cwsr_header(q, addr, Event, ErrPayload, node.NumXcc);
HSAKMT_STATUS r = register_svm_range(addr, size,
HSAKMT_STATUS r = register_svm_range(ctx, addr, size,
NodeId, NodeId, 0, true);
if (r == HSAKMT_STATUS_SUCCESS) {
@@ -680,7 +682,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExtCtx(HsaKFDContext *ctx,
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
return HSAKMT_STATUS_INVALID_PARAMETER;
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
@@ -691,7 +693,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExtCtx(HsaKFDContext *ctx,
memset(q, 0, sizeof(*q));
q->gfxv = hsakmt_get_gfxv_by_node_id(NodeId);
q->gfxv = hsakmt_get_gfxv_by_node_id(ctx, NodeId);
q->use_ats = false;
if (q->gfxv == GFX_VERSION_TONGA)
@@ -932,7 +934,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandlerCtx(HsaKFDContext *ctx,
CHECK_KFD_OPEN();
result = hsakmt_validate_nodeid(Node, &gpu_id);
result = hsakmt_validate_nodeid(ctx, Node, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
+3 -3
Vedi File
@@ -35,7 +35,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMAcquire(HSAuint32 PreferredNode)
struct kfd_ioctl_spm_args args = {0};
uint32_t gpu_id;
ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, PreferredNode, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
return ret;
@@ -61,7 +61,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode,
struct kfd_ioctl_spm_args args = {0};
uint32_t gpu_id = 0;
ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, PreferredNode, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
return ret;
}
@@ -87,7 +87,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMRelease(HSAuint32 PreferredNode)
struct kfd_ioctl_spm_args args = {0};
uint32_t gpu_id;
ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, PreferredNode, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
return ret;
+3 -3
Vedi File
@@ -81,7 +81,7 @@ hsaKmtSVMSetAttrCtx(HsaKFDContext *ctx,
continue;
}
r = hsakmt_validate_nodeid(attrs[i].value, &args->attrs[i].value);
r = hsakmt_validate_nodeid(ctx, attrs[i].value, &args->attrs[i].value);
if (r != HSAKMT_STATUS_SUCCESS) {
pr_debug("invalid node ID: %d\n", attrs[i].value);
return r;
@@ -141,7 +141,7 @@ hsaKmtSVMGetAttrCtx(HsaKFDContext *ctx,
attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
continue;
r = hsakmt_validate_nodeid(attrs[i].value, &args->attrs[i].value);
r = hsakmt_validate_nodeid(ctx, attrs[i].value, &args->attrs[i].value);
if (r != HSAKMT_STATUS_SUCCESS) {
pr_debug("invalid node ID: %d\n", attrs[i].value);
return r;
@@ -176,7 +176,7 @@ hsaKmtSVMGetAttrCtx(HsaKFDContext *ctx,
attrs[i].value = INVALID_NODEID;
break;
default:
r = hsakmt_gpuid_to_nodeid(attrs[i].value, &attrs[i].value);
r = hsakmt_gpuid_to_nodeid(ctx, attrs[i].value, &attrs[i].value);
if (r != HSAKMT_STATUS_SUCCESS) {
pr_debug("invalid GPU ID: %d\n",
attrs[i].value);
+10 -3
Vedi File
@@ -26,7 +26,8 @@
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCountersCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HsaClockCounters *Counters)
{
HSAKMT_STATUS result;
@@ -36,13 +37,13 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
CHECK_KFD_OPEN();
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
result = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
args.gpu_id = gpu_id;
err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
if (err < 0) {
result = HSAKMT_STATUS_ERROR;
} else {
@@ -55,3 +56,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
HsaClockCounters *Counters)
{
return hsaKmtGetClockCountersCtx(&hsakmt_primary_kfd_ctx, NodeId, Counters);
}
+199 -129
Vedi File
@@ -72,16 +72,6 @@ typedef struct {
HsaIoLinkProperties *link;
} node_props_t;
static HsaSystemProperties *g_system;
static node_props_t *g_props;
/* This array caches sysfs based node IDs of CPU nodes + all supported GPU nodes.
* It will be used to map user-node IDs to sysfs-node IDs.
*/
static uint32_t *map_user_to_sysfs_node_id;
static uint32_t map_user_to_sysfs_node_id_size;
static uint32_t num_sysfs_nodes;
static int processor_vendor = -1;
/* Supported System Vendors */
enum SUPPORTED_PROCESSOR_VENDORS {
@@ -96,8 +86,45 @@ static const char *supported_processor_vendor_name[] = {
"\n" // POWER requires a different search method
};
/*
* KFD Topology Context
*/
struct hsa_kfd_topology_context
{
HsaSystemProperties* system_props;
node_props_t *node_props;
/* This array caches sysfs based node IDs of CPU nodes + all supported GPU nodes.
* It will be used to map user-node IDs to sysfs-node IDs.
*/
uint32_t *map_user_to_sysfs_node_id;
uint32_t map_user_to_sysfs_node_id_size;
uint32_t num_sysfs_nodes;
};
struct hsa_kfd_topology_context *hsakmt_kfdcontext_get_topology_context(HsaKFDContext *ctx)
{
assert(ctx);
if (!ctx) {
pr_err("Expected a non-null ptr for HsaKFDContext");
return NULL;
}
if (ctx->topology_context)
return ctx->topology_context;
ctx->topology_context = calloc(1, sizeof(struct hsa_kfd_topology_context));
if (!ctx->topology_context) {
pr_err("Alloc memory failed for struct hsa_kfd_topology_context size %zu\n",
sizeof(struct hsa_kfd_topology_context));
return NULL;
}
return ctx->topology_context;
}
static HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx);
static void topology_drop_snapshot(void);
static void topology_drop_snapshot(HsaKFDContext *ctx);
static const struct hsa_gfxip_table gfxip_lookup_table[] = {
/* Kaveri Family */
@@ -610,12 +637,15 @@ err:
return ret;
}
static HSAKMT_STATUS topology_sysfs_map_node_id(uint32_t node_id, uint32_t *sys_node_id)
static HSAKMT_STATUS topology_sysfs_map_node_id(
struct hsa_kfd_topology_context *topology_ctx,
uint32_t node_id, uint32_t *sys_node_id)
{
if ((!map_user_to_sysfs_node_id) || (node_id >= map_user_to_sysfs_node_id_size))
if ((!topology_ctx->map_user_to_sysfs_node_id) ||
(node_id >= topology_ctx->map_user_to_sysfs_node_id_size))
return HSAKMT_STATUS_NOT_SUPPORTED;
*sys_node_id = map_user_to_sysfs_node_id[node_id];
*sys_node_id = topology_ctx->map_user_to_sysfs_node_id[node_id];
return HSAKMT_STATUS_SUCCESS;
}
@@ -737,6 +767,7 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
bool is_node_supported = true;
uint32_t num_supported_nodes = 0;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
assert(props);
snprintf(path, sizeof(path), KFD_SYSFS_PATH_SYSTEM_PROPERTIES, get_topology_dir());
@@ -779,34 +810,34 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
* which represent the node numbers
*/
snprintf(path, sizeof(path), KFD_SYSFS_PATH_NODES, get_topology_dir());
num_sysfs_nodes = num_subdirs(path, "");
topology_ctx->num_sysfs_nodes = num_subdirs(path, "");
if (map_user_to_sysfs_node_id == NULL) {
if (topology_ctx->map_user_to_sysfs_node_id == NULL) {
/* Trade off - num_sysfs_nodes includes all CPU and GPU nodes.
* Slightly more memory is allocated than necessary.
*/
map_user_to_sysfs_node_id = calloc(num_sysfs_nodes, sizeof(uint32_t));
if (map_user_to_sysfs_node_id == NULL) {
topology_ctx->map_user_to_sysfs_node_id = calloc(topology_ctx->num_sysfs_nodes, sizeof(uint32_t));
if (topology_ctx->map_user_to_sysfs_node_id == NULL) {
ret = HSAKMT_STATUS_NO_MEMORY;
goto err2;
}
map_user_to_sysfs_node_id_size = num_sysfs_nodes;
} else if (num_sysfs_nodes > map_user_to_sysfs_node_id_size) {
free(map_user_to_sysfs_node_id);
map_user_to_sysfs_node_id = calloc(num_sysfs_nodes, sizeof(uint32_t));
if (map_user_to_sysfs_node_id == NULL) {
topology_ctx->map_user_to_sysfs_node_id_size = topology_ctx->num_sysfs_nodes;
} else if (topology_ctx->num_sysfs_nodes > topology_ctx->map_user_to_sysfs_node_id_size) {
free(topology_ctx->map_user_to_sysfs_node_id);
topology_ctx->map_user_to_sysfs_node_id = calloc(topology_ctx->num_sysfs_nodes, sizeof(uint32_t));
if (topology_ctx->map_user_to_sysfs_node_id == NULL) {
ret = HSAKMT_STATUS_NO_MEMORY;
goto err2;
}
map_user_to_sysfs_node_id_size = num_sysfs_nodes;
topology_ctx->map_user_to_sysfs_node_id_size = topology_ctx->num_sysfs_nodes;
}
for (uint32_t i = 0; i < num_sysfs_nodes; i++) {
for (uint32_t i = 0; i < topology_ctx->num_sysfs_nodes; i++) {
ret = topology_sysfs_check_node_supported(ctx, i, &is_node_supported);
if (ret != HSAKMT_STATUS_SUCCESS)
goto sysfs_parse_failed;
if (is_node_supported)
map_user_to_sysfs_node_id[num_supported_nodes++] = i;
topology_ctx->map_user_to_sysfs_node_id[num_supported_nodes++] = i;
}
props->NumNodes = num_supported_nodes;
@@ -815,8 +846,8 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
return ret;
sysfs_parse_failed:
free(map_user_to_sysfs_node_id);
map_user_to_sysfs_node_id = NULL;
free(topology_ctx->map_user_to_sysfs_node_id);
topology_ctx->map_user_to_sysfs_node_id = NULL;
err2:
free(read_buf);
err1:
@@ -1077,7 +1108,8 @@ err_device_initialize:
return ret;
}
static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
static HSAKMT_STATUS topology_sysfs_get_node_props(HsaKFDContext *ctx,
uint32_t node_id,
HsaNodeProperties *props,
bool *p2p_links,
uint32_t *num_p2pLinks)
@@ -1097,9 +1129,9 @@ static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
uint32_t simd_arrays_count = 0;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
assert(props);
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
@@ -1307,7 +1339,9 @@ out:
return ret;
}
static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
static HSAKMT_STATUS topology_sysfs_get_mem_props(
struct hsa_kfd_topology_context *topology_ctx,
uint32_t node_id,
uint32_t mem_id,
HsaMemoryProperties *props)
{
@@ -1322,7 +1356,7 @@ static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
uint32_t sys_node_id;
assert(props);
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
@@ -1541,7 +1575,9 @@ exit:
return ret;
}
static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
static HSAKMT_STATUS topology_sysfs_get_cache_props(
struct hsa_kfd_topology_context *topology_ctx,
uint32_t node_id,
uint32_t cache_id,
HsaCacheProperties *props)
{
@@ -1556,7 +1592,7 @@ static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
uint32_t sys_node_id;
assert(props);
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
@@ -1619,12 +1655,13 @@ err1:
return ret;
}
static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(uint32_t sys_node_id, uint32_t *user_node_id)
static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(struct hsa_kfd_topology_context *topology_ctx,
uint32_t sys_node_id, uint32_t *user_node_id)
{
uint32_t node_id;
for (node_id = 0; node_id < map_user_to_sysfs_node_id_size; node_id++)
if (map_user_to_sysfs_node_id[node_id] == sys_node_id) {
for (node_id = 0; node_id < topology_ctx->map_user_to_sysfs_node_id_size; node_id++)
if (topology_ctx->map_user_to_sysfs_node_id[node_id] == sys_node_id) {
*user_node_id = node_id;
return HSAKMT_STATUS_SUCCESS;
}
@@ -1652,9 +1689,10 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(HsaKFDContext *ctx,
int read_size;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint32_t sys_node_id;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
assert(props);
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
@@ -1707,7 +1745,7 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(HsaKFDContext *ctx,
memset(props, 0, sizeof(*props));
goto err2;
}
ret = topology_map_sysfs_to_user_node_id(sysfs_node_id, &props->NodeTo);
ret = topology_map_sysfs_to_user_node_id(topology_ctx, sysfs_node_id, &props->NodeTo);
if (ret != HSAKMT_STATUS_SUCCESS)
goto err2;
} else if (strcmp(prop_name, "weight") == 0)
@@ -1974,6 +2012,7 @@ HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx)
uint32_t num_ioLinks;
bool p2p_links = false;
uint32_t num_p2pLinks = 0;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
cpuinfo = calloc(num_procs, sizeof(struct proc_cpuinfo));
if (!cpuinfo) {
@@ -1996,7 +2035,7 @@ retry:
goto err;
}
for (i = 0; i < sys_props.NumNodes; i++) {
ret = topology_sysfs_get_node_props(i,
ret = topology_sysfs_get_node_props(ctx, i,
&temp_props[i].node,
&p2p_links, &num_p2pLinks);
if (ret != HSAKMT_STATUS_SUCCESS) {
@@ -2016,7 +2055,7 @@ retry:
goto err;
}
for (mem_id = 0; mem_id < temp_props[i].node.NumMemoryBanks; mem_id++) {
ret = topology_sysfs_get_mem_props(i, mem_id, &temp_props[i].mem[mem_id]);
ret = topology_sysfs_get_mem_props(topology_ctx, i, mem_id, &temp_props[i].mem[mem_id]);
if (ret != HSAKMT_STATUS_SUCCESS) {
free_properties(temp_props, i + 1);
goto err;
@@ -2032,7 +2071,8 @@ retry:
goto err;
}
for (cache_id = 0; cache_id < temp_props[i].node.NumCaches; cache_id++) {
ret = topology_sysfs_get_cache_props(i, cache_id, &temp_props[i].cache[cache_id]);
ret = topology_sysfs_get_cache_props(topology_ctx,
i, cache_id, &temp_props[i].cache[cache_id]);
if (ret != HSAKMT_STATUS_SUCCESS) {
free_properties(temp_props, i + 1);
goto err;
@@ -2122,62 +2162,72 @@ retry:
goto retry;
}
if (!g_system) {
g_system = malloc(sizeof(HsaSystemProperties));
if (!g_system) {
if (!topology_ctx->system_props) {
topology_ctx->system_props = malloc(sizeof(HsaSystemProperties));
if (!topology_ctx->system_props) {
free_properties(temp_props, sys_props.NumNodes);
ret = HSAKMT_STATUS_NO_MEMORY;
goto err;
}
}
*g_system = sys_props;
if (g_props)
free(g_props);
g_props = temp_props;
*topology_ctx->system_props = sys_props;
if (topology_ctx->node_props)
free(topology_ctx->node_props);
topology_ctx->node_props = temp_props;
err:
free(cpuinfo);
return ret;
}
/* Drop the Snapshot of the HSA topology information. Assume lock is held. */
void topology_drop_snapshot(void)
void topology_drop_snapshot(HsaKFDContext *ctx)
{
if (!!g_system != !!g_props)
struct hsa_kfd_topology_context *topology_ctx =
hsakmt_kfdcontext_get_topology_context(ctx);
if (!!topology_ctx->system_props != !!topology_ctx->node_props)
pr_warn("Probably inconsistency?\n");
if (g_props) {
if (topology_ctx->node_props) {
/* Remove state */
free_properties(g_props, g_system->NumNodes);
g_props = NULL;
free_properties(topology_ctx->node_props, topology_ctx->system_props->NumNodes);
topology_ctx->node_props = NULL;
}
free(g_system);
g_system = NULL;
free(topology_ctx->system_props);
topology_ctx->system_props = NULL;
if (map_user_to_sysfs_node_id) {
free(map_user_to_sysfs_node_id);
map_user_to_sysfs_node_id = NULL;
map_user_to_sysfs_node_id_size = 0;
if (topology_ctx->map_user_to_sysfs_node_id) {
free(topology_ctx->map_user_to_sysfs_node_id);
topology_ctx->map_user_to_sysfs_node_id = NULL;
topology_ctx->map_user_to_sysfs_node_id_size = 0;
}
}
HSAKMT_STATUS hsakmt_validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
HSAKMT_STATUS hsakmt_validate_nodeid(HsaKFDContext *ctx, uint32_t nodeid, uint32_t *gpu_id)
{
if (!g_props || !g_system || g_system->NumNodes <= nodeid)
struct hsa_kfd_topology_context *topology_ctx =
hsakmt_kfdcontext_get_topology_context(ctx);
if (!topology_ctx->node_props || !topology_ctx->system_props ||
topology_ctx->system_props->NumNodes <= nodeid)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (gpu_id)
*gpu_id = g_props[nodeid].node.KFDGpuID;
*gpu_id = topology_ctx->node_props[nodeid].node.KFDGpuID;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(HsaKFDContext *ctx, uint32_t gpu_id, uint32_t *node_id)
{
uint64_t node_idx;
for (node_idx = 0; node_idx < g_system->NumNodes; node_idx++) {
if (g_props[node_idx].node.KFDGpuID == gpu_id) {
struct hsa_kfd_topology_context *topology_ctx =
hsakmt_kfdcontext_get_topology_context(ctx);
for (node_idx = 0; node_idx < topology_ctx->system_props->NumNodes; node_idx++) {
if (topology_ctx->node_props[node_idx].node.KFDGpuID == gpu_id) {
*node_id = node_idx;
return HSAKMT_STATUS_SUCCESS;
}
@@ -2193,6 +2243,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
CHECK_KFD_OPEN();
struct hsa_kfd_topology_context *topology_ctx =
hsakmt_kfdcontext_get_topology_context(ctx);
if (!SystemProperties)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -2202,8 +2254,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
/* We already have a valid snapshot. Avoid double initialization that
* would leak memory.
*/
if (g_system) {
*SystemProperties = *g_system;
if (topology_ctx->system_props) {
*SystemProperties = *topology_ctx->system_props;
goto out;
}
@@ -2211,23 +2263,23 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
if (err != HSAKMT_STATUS_SUCCESS)
goto out;
assert(g_system);
assert(topology_ctx->system_props);
if (hsakmt_use_model)
model_init();
err = hsakmt_fmm_init_process_apertures(ctx, g_system->NumNodes);
err = hsakmt_fmm_init_process_apertures(ctx, topology_ctx->system_props->NumNodes);
if (err != HSAKMT_STATUS_SUCCESS)
goto init_process_apertures_failed;
err = hsakmt_init_process_doorbells(ctx, g_system->NumNodes);
err = hsakmt_init_process_doorbells(ctx, topology_ctx->system_props->NumNodes);
if (err != HSAKMT_STATUS_SUCCESS)
goto init_doorbells_failed;
*SystemProperties = *g_system;
*SystemProperties = *topology_ctx->system_props;
for (int node = 0; node < g_system->NumNodes; node++) {
if (hsakmt_get_gfxv_by_node_id(node) == GFX_VERSION_GFX1151 &&
for (int node = 0; node < topology_ctx->system_props->NumNodes; node++) {
if (hsakmt_get_gfxv_by_node_id(ctx, node) == GFX_VERSION_GFX1151 &&
hsakmt_kfd_version_info.KernelInterfaceMajorVersion == 1 &&
hsakmt_kfd_version_info.KernelInterfaceMinorVersion < 20)
pr_err_once("WARNING: KFD ABI 1.20+ is recommended for gfx1151. Current KFD ABI is %i.%i. This may result in faults, crashes and other application instability\n", hsakmt_kfd_version_info.KernelInterfaceMajorVersion, hsakmt_kfd_version_info.KernelInterfaceMinorVersion);
@@ -2238,7 +2290,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
init_doorbells_failed:
hsakmt_fmm_destroy_process_apertures(ctx);
init_process_apertures_failed:
topology_drop_snapshot();
topology_drop_snapshot(ctx);
out:
pthread_mutex_unlock(&hsakmt_mutex);
@@ -2251,20 +2303,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemPropertiesCtx(HsaKFDContext *ctx)
hsakmt_destroy_process_doorbells(ctx);
hsakmt_fmm_destroy_process_apertures(ctx);
topology_drop_snapshot();
topology_drop_snapshot(ctx);
pthread_mutex_unlock(&hsakmt_mutex);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
HSAKMT_STATUS hsakmt_topology_get_node_props(HsaKFDContext *ctx,
HSAuint32 NodeId,
HsaNodeProperties *NodeProperties)
{
if (!g_system || !g_props || NodeId >= g_system->NumNodes)
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
if (!topology_ctx->system_props || !topology_ctx->node_props ||
NodeId >= topology_ctx->system_props->NumNodes)
return HSAKMT_STATUS_ERROR;
*NodeProperties = g_props[NodeId].node;
*NodeProperties = topology_ctx->node_props[NodeId].node;
return HSAKMT_STATUS_SUCCESS;
}
@@ -2282,11 +2338,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodePropertiesCtx(HsaKFDContext *ctx,
CHECK_KFD_OPEN();
pthread_mutex_lock(&hsakmt_mutex);
err = hsakmt_validate_nodeid(NodeId, &gpu_id);
err = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
if (err != HSAKMT_STATUS_SUCCESS)
goto out;
err = hsakmt_topology_get_node_props(NodeId, NodeProperties);
err = hsakmt_topology_get_node_props(ctx, NodeId, NodeProperties);
if (err != HSAKMT_STATUS_SUCCESS)
goto out;
/* For CPU only node don't add any additional GPU memory banks. */
@@ -2314,6 +2370,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
uint32_t i, gpu_id;
HSAuint64 aperture_limit;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
node_props_t *node_props = topology_ctx->node_props;
if (!MemoryProperties)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -2321,15 +2379,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
CHECK_KFD_OPEN();
pthread_mutex_lock(&hsakmt_mutex);
err = hsakmt_validate_nodeid(NodeId, &gpu_id);
err = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
if (err != HSAKMT_STATUS_SUCCESS)
goto out;
memset(MemoryProperties, 0, NumBanks * sizeof(HsaMemoryProperties));
for (i = 0; i < MIN(g_props[NodeId].node.NumMemoryBanks, NumBanks); i++) {
assert(g_props[NodeId].mem);
MemoryProperties[i] = g_props[NodeId].mem[i];
for (i = 0; i < MIN(node_props[NodeId].node.NumMemoryBanks, NumBanks); i++) {
assert(node_props[NodeId].mem);
MemoryProperties[i] = node_props[NodeId].mem[i];
}
/* The following memory banks does not apply to CPU only node */
@@ -2341,7 +2399,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_LDS, gpu_id,
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LDSSizeInKB * 1024;
MemoryProperties[i].SizeInBytes = node_props[NodeId].node.LDSSizeInKB * 1024;
i++;
}
@@ -2349,12 +2407,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
* For dGPU the topology node contains Local Memory and it is added by
* the for loop above
*/
if (hsakmt_get_gfxv_by_node_id(NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
g_props[NodeId].node.LocalMemSize > 0 &&
if (hsakmt_get_gfxv_by_node_id(ctx, NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
node_props[NodeId].node.LocalMemSize > 0 &&
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_GPUVM, gpu_id,
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LocalMemSize;
MemoryProperties[i].SizeInBytes = node_props[NodeId].node.LocalMemSize;
i++;
}
@@ -2368,7 +2426,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
}
/* Add SVM aperture */
if (hsakmt_topology_is_svm_needed(g_props[NodeId].node.EngineId) && i < NumBanks &&
if (hsakmt_topology_is_svm_needed(node_props[NodeId].node.EngineId) && i < NumBanks &&
hsakmt_fmm_get_aperture_base_and_limit(ctx,
FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
&aperture_limit) == HSAKMT_STATUS_SUCCESS) {
@@ -2399,6 +2457,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCachePropertiesCtx(HsaKFDContext *ctx,
{
HSAKMT_STATUS err;
uint32_t i;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
if (!CacheProperties)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -2407,19 +2466,19 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCachePropertiesCtx(HsaKFDContext *ctx,
pthread_mutex_lock(&hsakmt_mutex);
/* KFD ADD page 18, snapshot protocol violation */
if (!g_system || NodeId >= g_system->NumNodes) {
if (!topology_ctx->system_props || NodeId >= topology_ctx->system_props->NumNodes) {
err = HSAKMT_STATUS_INVALID_NODE_UNIT;
goto out;
}
if (NumCaches > g_props[NodeId].node.NumCaches) {
if (NumCaches > topology_ctx->node_props[NodeId].node.NumCaches) {
err = HSAKMT_STATUS_INVALID_PARAMETER;
goto out;
}
for (i = 0; i < MIN(g_props[NodeId].node.NumCaches, NumCaches); i++) {
assert(g_props[NodeId].cache);
CacheProperties[i] = g_props[NodeId].cache[i];
for (i = 0; i < MIN(topology_ctx->node_props[NodeId].node.NumCaches, NumCaches); i++) {
assert(topology_ctx->node_props[NodeId].cache);
CacheProperties[i] = topology_ctx->node_props[NodeId].cache[i];
}
err = HSAKMT_STATUS_SUCCESS;
@@ -2429,14 +2488,18 @@ out:
return err;
}
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSAuint32 NumIoLinks,
HsaIoLinkProperties *IoLinkProperties)
{
if (!g_system || !g_props || NodeId >= g_system->NumNodes)
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
if (!topology_ctx->system_props || !topology_ctx->node_props ||
NodeId >= topology_ctx->system_props->NumNodes)
return HSAKMT_STATUS_ERROR;
memcpy(IoLinkProperties, g_props[NodeId].link,
memcpy(IoLinkProperties, topology_ctx->node_props[NodeId].link,
NumIoLinks * sizeof(*IoLinkProperties));
return HSAKMT_STATUS_SUCCESS;
@@ -2448,6 +2511,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkPropertiesCtx(HsaKFDContext *ctx,
HsaIoLinkProperties *IoLinkProperties)
{
HSAKMT_STATUS err;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
if (!IoLinkProperties)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -2457,79 +2521,85 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkPropertiesCtx(HsaKFDContext *ctx,
pthread_mutex_lock(&hsakmt_mutex);
/* KFD ADD page 18, snapshot protocol violation */
if (!g_system || NodeId >= g_system->NumNodes ) {
if (!topology_ctx->system_props || NodeId >= topology_ctx->system_props->NumNodes ) {
err = HSAKMT_STATUS_INVALID_NODE_UNIT;
goto out;
}
if (NumIoLinks > g_props[NodeId].node.NumIOLinks) {
if (NumIoLinks > topology_ctx->node_props[NodeId].node.NumIOLinks) {
err = HSAKMT_STATUS_INVALID_PARAMETER;
goto out;
}
assert(g_props[NodeId].link);
err = hsakmt_topology_get_iolink_props(NodeId, NumIoLinks, IoLinkProperties);
assert(topology_ctx->node_props[NodeId].link);
err = hsakmt_topology_get_iolink_props(ctx, NodeId, NumIoLinks, IoLinkProperties);
out:
pthread_mutex_unlock(&hsakmt_mutex);
return err;
}
uint32_t hsakmt_get_gfxv_by_node_id(HSAuint32 node_id)
uint32_t hsakmt_get_gfxv_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id)
{
return HSA_GET_GFX_VERSION_FULL(g_props[node_id].node.EngineId.ui32);
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
return HSA_GET_GFX_VERSION_FULL(topology_ctx->node_props[node_id].node.EngineId.ui32);
}
uint16_t hsakmt_get_device_id_by_node_id(HSAuint32 node_id)
uint16_t hsakmt_get_device_id_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id)
{
if (!g_props || !g_system || g_system->NumNodes <= node_id)
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
if (!topology_ctx->node_props || !topology_ctx->system_props ||
topology_ctx->system_props->NumNodes <= node_id)
return 0;
return g_props[node_id].node.DeviceId;
return topology_ctx->node_props[node_id].node.DeviceId;
}
bool hsakmt_prefer_ats(HSAuint32 node_id)
bool hsakmt_prefer_ats(HsaKFDContext *ctx, HSAuint32 node_id)
{
return g_props[node_id].node.Capability.ui32.HSAMMUPresent
&& g_props[node_id].node.NumCPUCores
&& g_props[node_id].node.NumFComputeCores;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
return topology_ctx->node_props[node_id].node.Capability.ui32.HSAMMUPresent
&& topology_ctx->node_props[node_id].node.NumCPUCores
&& topology_ctx->node_props[node_id].node.NumFComputeCores;
}
uint16_t hsakmt_get_device_id_by_gpu_id(HSAuint32 gpu_id)
uint16_t hsakmt_get_device_id_by_gpu_id(HsaKFDContext *ctx, HSAuint32 gpu_id)
{
unsigned int i;
if (!g_props || !g_system)
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
if (!topology_ctx->node_props || !topology_ctx->system_props)
return 0;
for (i = 0; i < g_system->NumNodes; i++) {
if (g_props[i].node.KFDGpuID == gpu_id)
return g_props[i].node.DeviceId;
for (i = 0; i < topology_ctx->system_props->NumNodes; i++) {
if (topology_ctx->node_props[i].node.KFDGpuID == gpu_id)
return topology_ctx->node_props[i].node.DeviceId;
}
return 0;
}
uint32_t hsakmt_get_direct_link_cpu(uint32_t gpu_node)
uint32_t hsakmt_get_direct_link_cpu(HsaKFDContext *ctx, HSAuint32 gpu_node)
{
HSAuint64 size = 0;
int32_t cpu_id;
HSAuint32 i;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
cpu_id = gpu_get_direct_link_cpu(gpu_node, g_props);
cpu_id = gpu_get_direct_link_cpu(gpu_node, topology_ctx->node_props);
if (cpu_id == -1)
return INVALID_NODEID;
assert(g_props[cpu_id].mem);
for (i = 0; i < g_props[cpu_id].node.NumMemoryBanks; i++)
size += g_props[cpu_id].mem[i].SizeInBytes;
assert(topology_ctx->node_props[cpu_id].mem);
for (i = 0; i < topology_ctx->node_props[cpu_id].node.NumMemoryBanks; i++)
size += topology_ctx->node_props[cpu_id].mem[i].SizeInBytes;
return size ? (uint32_t)cpu_id : INVALID_NODEID;
}
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
HSAKMT_STATUS hsakmt_validate_nodeid_array(HsaKFDContext *ctx,
uint32_t **gpu_id_array,
uint32_t NumberOfNodes, uint32_t *NodeArray)
{
HSAKMT_STATUS ret;
@@ -2543,7 +2613,7 @@ HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
if (!(*gpu_id_array))
return HSAKMT_STATUS_NO_MEMORY;
for (i = 0; i < NumberOfNodes; i++) {
ret = hsakmt_validate_nodeid(NodeArray[i], *gpu_id_array + i);
ret = hsakmt_validate_nodeid(ctx, NodeArray[i], *gpu_id_array + i);
if (ret != HSAKMT_STATUS_SUCCESS) {
free(*gpu_id_array);
break;
@@ -2553,13 +2623,13 @@ HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
return ret;
}
inline uint32_t hsakmt_get_num_sysfs_nodes(void)
uint32_t hsakmt_get_num_sysfs_nodes(HsaKFDContext *ctx)
{
return num_sysfs_nodes;
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
return topology_ctx->num_sysfs_nodes;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
{
return hsaKmtAcquireSystemPropertiesCtx(&hsakmt_primary_kfd_ctx, SystemProperties);