libhsakmt: Implement per-context topology for multi-context KFD support (#2405)
This enhances libhsakmt's capabilities for multi-context KFD support by implementing per-context topology management.
Changes:
* Add hsaKmtGetClockCountersCtx for multi-context support
- Add context-aware version of hsaKmtGetClockCounters
- Original API is retained as a wrapper calling the ctx-version with primary context
* Enable independent debug sessions across multiple KFD contexts
-Create hsa_kfd_debug_context, introduce context-aware debug APIs, shift debug state to per-context
* Add perf sub-context for per-context performance counter management
- Introduce hsa_kfd_perf_context, move counter properties, add context - aware perf APIs, and update initialization
* Refactor FMM for per-context resource management
- Refactor multiple global variables related to FMM, including
GPU ID arrays , svm, cpuvm_aperture, and mem_handle_aperture to hsa_kfd_fmm_context
* Implement per-context topology for complete context isolation
- Migrate global topology data (g_system, g_props, map_user_to_sysfs_node_id)
to per-context hsa_kfd_topology_context structure
- Update all topology functions to accept HsaKFDContext parameter for
context-aware operations (validate_nodeid, get_node_props, get_iolink_props, etc.)
- Refactor topology snapshot management for per-context isolation
- Add context-aware PMC trace access APIs
Signed-off-by: Junhua Shen <Junhua.Shen@amd.com>
此提交包含在:
@@ -32,6 +32,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Forward declaration for debug trap ioctl arguments */
|
||||
struct kfd_ioctl_dbg_trap_args;
|
||||
|
||||
/**
|
||||
"Opens" the HSA kernel driver for user-kernel mode communication.
|
||||
@@ -852,8 +854,10 @@ hsaKmtCheckRuntimeDebugSupport(
|
||||
/**
|
||||
Debug ops call primarily used for KFD testing
|
||||
*/
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(
|
||||
struct kfd_ioctl_dbg_trap_args *arg,
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDebugTrapIoctl(
|
||||
struct kfd_ioctl_dbg_trap_args *args,
|
||||
HSA_QUEUEID *Queues,
|
||||
HSAuint64 *DebugReturn
|
||||
);
|
||||
|
||||
@@ -26,38 +26,82 @@
|
||||
#include "libhsakmt.h"
|
||||
#include "hsakmt/linux/kfd_ioctl.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
|
||||
static bool *is_device_debugged;
|
||||
static uint32_t runtime_capabilities_mask = 0;
|
||||
/*
|
||||
* hsa_kfd_debug_context
|
||||
*
|
||||
* Represents the debug state for a KFD context.
|
||||
* Each HsaKFDContext has its own independent debug context.
|
||||
*/
|
||||
struct hsa_kfd_debug_context {
|
||||
/* Array tracking which nodes are being debugged */
|
||||
bool *is_device_debugged;
|
||||
|
||||
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes)
|
||||
/* Runtime debug capabilities mask */
|
||||
uint32_t runtime_capabilities_mask;
|
||||
};
|
||||
|
||||
struct hsa_kfd_debug_context *hsakmt_kfdcontext_get_debug_context(HsaKFDContext *ctx)
|
||||
{
|
||||
assert(ctx);
|
||||
if (!ctx) {
|
||||
pr_err("Expected a non-null ptr for HsaKFDContext");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ctx->debug_context)
|
||||
return ctx->debug_context;
|
||||
|
||||
ctx->debug_context = calloc(1, sizeof(struct hsa_kfd_debug_context));
|
||||
if (!ctx->debug_context) {
|
||||
pr_err("Alloc memory failed for struct hsa_kfd_debug_context size %zu\n",
|
||||
sizeof(struct hsa_kfd_debug_context));
|
||||
return NULL;
|
||||
}
|
||||
return ctx->debug_context;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_init_device_debugging_memory(HsaKFDContext *ctx, unsigned int NumNodes)
|
||||
{
|
||||
unsigned int i;
|
||||
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
|
||||
if (!debug_ctx)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
is_device_debugged = malloc(NumNodes * sizeof(bool));
|
||||
if (!is_device_debugged)
|
||||
debug_ctx->is_device_debugged = malloc(NumNodes * sizeof(bool));
|
||||
if (!debug_ctx->is_device_debugged)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
for (i = 0; i < NumNodes; i++)
|
||||
is_device_debugged[i] = false;
|
||||
debug_ctx->is_device_debugged[i] = false;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void hsakmt_destroy_device_debugging_memory(void)
|
||||
void hsakmt_destroy_device_debugging_memory(HsaKFDContext *ctx)
|
||||
{
|
||||
if (is_device_debugged) {
|
||||
free(is_device_debugged);
|
||||
is_device_debugged = NULL;
|
||||
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
|
||||
if (!debug_ctx)
|
||||
return;
|
||||
|
||||
if (debug_ctx->is_device_debugged) {
|
||||
free(debug_ctx->is_device_debugged);
|
||||
debug_ctx->is_device_debugged = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool hsakmt_debug_get_reg_status(uint32_t node_id)
|
||||
bool hsakmt_debug_get_reg_status(HsaKFDContext *ctx, uint32_t node_id)
|
||||
{
|
||||
return is_device_debugged[node_id];
|
||||
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
|
||||
if (!debug_ctx || !debug_ctx->is_device_debugged)
|
||||
return false;
|
||||
|
||||
return debug_ctx->is_device_debugged[node_id];
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
|
||||
@@ -66,11 +110,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
|
||||
uint32_t gpu_id;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
if (!is_device_debugged)
|
||||
struct hsa_kfd_debug_context *debug_ctx =
|
||||
hsakmt_kfdcontext_get_debug_context(&hsakmt_primary_kfd_ctx);
|
||||
if (!debug_ctx->is_device_debugged)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -94,11 +139,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId)
|
||||
HSAKMT_STATUS result;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
if (!is_device_debugged)
|
||||
struct hsa_kfd_debug_context *debug_ctx =
|
||||
hsakmt_kfdcontext_get_debug_context(&hsakmt_primary_kfd_ctx);
|
||||
if (!debug_ctx->is_device_debugged)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -126,7 +172,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId,
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -195,11 +241,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
|
||||
uint32_t watch_event_items = WatchEvent != NULL ? NumWatchPoints:0;
|
||||
|
||||
struct kfd_ioctl_dbg_address_watch_args *args;
|
||||
HSAuint32 i = 0;
|
||||
HSAuint32 i = 0;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -268,19 +314,19 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
|
||||
#define HSA_RUNTIME_ENABLE_MAX_MAJOR 1
|
||||
#define HSA_RUNTIME_ENABLE_MIN_MINOR 13
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void) {
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupportCtx(HsaKFDContext *ctx) {
|
||||
HsaNodeProperties node = {0};
|
||||
HsaSystemProperties props = {0};
|
||||
HsaVersionInfo versionInfo = {0};
|
||||
|
||||
memset(&node, 0x00, sizeof(node));
|
||||
memset(&props, 0x00, sizeof(props));
|
||||
if (hsaKmtAcquireSystemProperties(&props))
|
||||
if (hsaKmtAcquireSystemPropertiesCtx(ctx, &props))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
//the firmware of gpu node doesn't support the debugger, disable it.
|
||||
for (uint32_t i = 0; i < props.NumNodes; i++) {
|
||||
if (hsaKmtGetNodeProperties(i, &node))
|
||||
if (hsaKmtGetNodePropertiesCtx(ctx, i, &node))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
//ignore cpu node
|
||||
@@ -302,12 +348,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void) {
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnableCtx(HsaKFDContext *ctx,
|
||||
void *rDebug,
|
||||
bool setupTtmp)
|
||||
{
|
||||
struct kfd_ioctl_runtime_enable_args args = {0};
|
||||
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();
|
||||
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
|
||||
|
||||
struct kfd_ioctl_runtime_enable_args args = {0};
|
||||
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupportCtx(ctx);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
@@ -316,7 +364,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
|
||||
((setupTtmp) ? KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK : 0);
|
||||
args.r_debug = (HSAuint64)rDebug;
|
||||
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
|
||||
long err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
|
||||
|
||||
if (err) {
|
||||
if (errno == EBUSY)
|
||||
@@ -324,15 +372,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
|
||||
else
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
runtime_capabilities_mask= args.capabilities_mask;
|
||||
debug_ctx->runtime_capabilities_mask= args.capabilities_mask;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisableCtx(HsaKFDContext *ctx)
|
||||
{
|
||||
struct kfd_ioctl_runtime_enable_args args = {0};
|
||||
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();
|
||||
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupportCtx(ctx);
|
||||
|
||||
if (result)
|
||||
return result;
|
||||
@@ -340,19 +388,23 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
|
||||
memset(&args, 0x00, sizeof(args));
|
||||
args.mode_mask = 0; //Disable
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilities(HSAuint32 *caps_mask)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilitiesCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 *caps_mask)
|
||||
{
|
||||
*caps_mask = runtime_capabilities_mask;
|
||||
struct hsa_kfd_debug_context *debug_ctx = hsakmt_kfdcontext_get_debug_context(ctx);
|
||||
|
||||
*caps_mask = debug_ctx->runtime_capabilities_mask;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
|
||||
static HSAKMT_STATUS dbg_trap_get_device_data(HsaKFDContext *ctx,
|
||||
void *data,
|
||||
uint32_t *n_entries,
|
||||
uint32_t entry_size)
|
||||
{
|
||||
@@ -363,14 +415,15 @@ static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
|
||||
args.device_snapshot.entry_size = entry_size;
|
||||
args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
|
||||
args.pid = getpid();
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
*n_entries = args.device_snapshot.num_devices;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
|
||||
static HSAKMT_STATUS dbg_trap_get_queue_data(HsaKFDContext *ctx,
|
||||
void *data,
|
||||
uint32_t *n_entries,
|
||||
uint32_t entry_size,
|
||||
uint32_t *queue_ids)
|
||||
@@ -384,7 +437,7 @@ static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
|
||||
args.queue_snapshot.snapshot_buf_ptr = (uint64_t) data;
|
||||
args.pid = getpid();
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
*n_entries = args.queue_snapshot.num_queues;
|
||||
@@ -398,7 +451,8 @@ static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
|
||||
static HSAKMT_STATUS dbg_trap_suspend_queues(HsaKFDContext *ctx,
|
||||
uint32_t *queue_ids,
|
||||
uint32_t num_queues)
|
||||
{
|
||||
struct kfd_ioctl_dbg_trap_args args = {0};
|
||||
@@ -410,7 +464,7 @@ static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
|
||||
args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
|
||||
args.pid = getpid();
|
||||
|
||||
r = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args);
|
||||
r = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args);
|
||||
if (r < 0)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
@@ -420,7 +474,8 @@ static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
|
||||
/* Debugger support has been in KFD ABI 1.13. */
|
||||
#define KFD_MINOR_MIN_DEBUG 13
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnableCtx(HsaKFDContext *ctx,
|
||||
void **runtime_info,
|
||||
HSAuint32 *data_size)
|
||||
{
|
||||
struct kfd_ioctl_dbg_trap_args args = {0};
|
||||
@@ -429,7 +484,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
|
||||
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
|
||||
*data_size = sizeof(struct kfd_runtime_info);
|
||||
args.enable.rinfo_size = *data_size;
|
||||
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
|
||||
args.enable.dbg_fd = ctx->fd;
|
||||
*runtime_info = malloc(args.enable.rinfo_size);
|
||||
if (!*runtime_info)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
@@ -437,30 +492,31 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
|
||||
args.op = KFD_IOC_DBG_TRAP_ENABLE;
|
||||
args.pid = getpid();
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args)) {
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args)) {
|
||||
free(*runtime_info);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisableCtx(HsaKFDContext *ctx)
|
||||
{
|
||||
struct kfd_ioctl_dbg_trap_args args = {0};
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
|
||||
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
|
||||
args.enable.dbg_fd = ctx->fd;
|
||||
args.op = KFD_IOC_DBG_TRAP_DISABLE;
|
||||
args.pid = getpid();
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceDataCtx(HsaKFDContext *ctx,
|
||||
void **data,
|
||||
HSAuint32 *n_entries,
|
||||
HSAuint32 *entry_size)
|
||||
{
|
||||
@@ -473,14 +529,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
|
||||
*data = malloc(*entry_size * *n_entries);
|
||||
if (!*data)
|
||||
return ret;
|
||||
ret = dbg_trap_get_device_data(*data, n_entries, *entry_size);
|
||||
ret = dbg_trap_get_device_data(ctx, *data, n_entries, *entry_size);
|
||||
if (ret)
|
||||
free(*data);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueDataCtx(HsaKFDContext *ctx,
|
||||
void **data,
|
||||
HSAuint32 *n_entries,
|
||||
HSAuint32 *entry_size,
|
||||
bool suspend_queues)
|
||||
@@ -491,7 +548,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
|
||||
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
|
||||
*entry_size = sizeof(struct kfd_queue_snapshot_entry);
|
||||
*n_entries = 0;
|
||||
if (dbg_trap_get_queue_data(NULL, n_entries, *entry_size, NULL))
|
||||
if (dbg_trap_get_queue_data(ctx, NULL, n_entries, *entry_size, NULL))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
*data = malloc(*n_entries * *entry_size);
|
||||
if (!*data)
|
||||
@@ -499,11 +556,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
|
||||
if (suspend_queues && *n_entries)
|
||||
queue_ids = (uint32_t *)malloc(sizeof(uint32_t) * *n_entries);
|
||||
if (!queue_ids ||
|
||||
dbg_trap_get_queue_data(*data, n_entries, *entry_size, queue_ids))
|
||||
dbg_trap_get_queue_data(ctx, *data, n_entries, *entry_size, queue_ids))
|
||||
goto free_data;
|
||||
if (queue_ids) {
|
||||
if (dbg_trap_suspend_queues(queue_ids, *n_entries) ||
|
||||
dbg_trap_get_queue_data(*data, n_entries, *entry_size, NULL))
|
||||
if (dbg_trap_suspend_queues(ctx, queue_ids, *n_entries) ||
|
||||
dbg_trap_get_queue_data(ctx, *data, n_entries, *entry_size, NULL))
|
||||
goto free_data;
|
||||
free(queue_ids);
|
||||
}
|
||||
@@ -516,9 +573,10 @@ free_data:
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *args,
|
||||
HSA_QUEUEID *Queues,
|
||||
HSAuint64 *DebugReturn)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctlCtx(HsaKFDContext *ctx,
|
||||
struct kfd_ioctl_dbg_trap_args *args,
|
||||
HSA_QUEUEID *Queues,
|
||||
HSAuint64 *DebugReturn)
|
||||
{
|
||||
HSAKMT_STATUS result;
|
||||
|
||||
@@ -540,7 +598,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *arg
|
||||
free(queue_ids);
|
||||
}
|
||||
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, args);
|
||||
long err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DBG_TRAP, args);
|
||||
if (DebugReturn)
|
||||
*DebugReturn = err;
|
||||
|
||||
@@ -557,3 +615,58 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *arg
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCheckRuntimeDebugSupport(void)
|
||||
{
|
||||
return hsaKmtCheckRuntimeDebugSupportCtx(&hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
|
||||
bool setupTtmp)
|
||||
{
|
||||
return hsaKmtRuntimeEnableCtx(&hsakmt_primary_kfd_ctx, rDebug, setupTtmp);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
|
||||
{
|
||||
return hsaKmtRuntimeDisableCtx(&hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilities(HSAuint32 *caps_mask)
|
||||
{
|
||||
return hsaKmtGetRuntimeCapabilitiesCtx(&hsakmt_primary_kfd_ctx, caps_mask);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
|
||||
HSAuint32 *data_size)
|
||||
{
|
||||
return hsaKmtDbgEnableCtx(&hsakmt_primary_kfd_ctx, runtime_info, data_size);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
|
||||
{
|
||||
return hsaKmtDbgDisableCtx(&hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
|
||||
HSAuint32 *n_entries,
|
||||
HSAuint32 *entry_size)
|
||||
{
|
||||
return hsaKmtDbgGetDeviceDataCtx(&hsakmt_primary_kfd_ctx, data, n_entries, entry_size);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
|
||||
HSAuint32 *n_entries,
|
||||
HSAuint32 *entry_size,
|
||||
bool suspend_queues)
|
||||
{
|
||||
return hsaKmtDbgGetQueueDataCtx(&hsakmt_primary_kfd_ctx, data,
|
||||
n_entries, entry_size, suspend_queues);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *args,
|
||||
HSA_QUEUEID *Queues,
|
||||
HSAuint64 *DebugReturn)
|
||||
{
|
||||
return hsaKmtDebugTrapIoctlCtx(&hsakmt_primary_kfd_ctx, args, Queues, DebugReturn);
|
||||
}
|
||||
|
||||
@@ -307,7 +307,7 @@ static HSAKMT_STATUS get_mem_info_svm_api(HsaKFDContext *ctx, uint64_t address,
|
||||
args->attrs[i].value == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
|
||||
node_id = args->attrs[i].value;
|
||||
else
|
||||
hsakmt_gpuid_to_nodeid(args->attrs[i].value, &node_id);
|
||||
hsakmt_gpuid_to_nodeid(ctx, args->attrs[i].value, &node_id);
|
||||
switch (args->attrs[i].type) {
|
||||
case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
|
||||
pr_err("Preferred location for address 0x%lx is Node id %d\n",
|
||||
@@ -359,7 +359,7 @@ static void analysis_memory_exception(HsaKFDContext *ctx,
|
||||
uint32_t node_id = 0;
|
||||
unsigned int i;
|
||||
|
||||
hsakmt_gpuid_to_nodeid(memory_exception_data->gpu_id, &node_id);
|
||||
hsakmt_gpuid_to_nodeid(ctx, memory_exception_data->gpu_id, &node_id);
|
||||
pr_err("Memory exception on virtual address 0x%lx, ", addr);
|
||||
pr_err("node id %d : ", node_id);
|
||||
if (memory_exception_data->failure.NotPresent)
|
||||
@@ -468,7 +468,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_ExtCtx(HsaKFDContext *ctx,
|
||||
if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY &&
|
||||
event_data[i].memory_exception_data.gpu_id) {
|
||||
Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
|
||||
result = hsakmt_gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
|
||||
result = hsakmt_gpuid_to_nodeid(ctx, event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
|
||||
@@ -483,7 +483,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_ExtCtx(HsaKFDContext *ctx,
|
||||
} else if (Events[i]->EventData.EventType == HSA_EVENTTYPE_HW_EXCEPTION &&
|
||||
event_data[i].hw_exception_data.gpu_id) {
|
||||
|
||||
result = hsakmt_gpuid_to_nodeid(event_data[i].hw_exception_data.gpu_id, &Events[i]->EventData.EventData.HwException.NodeId);
|
||||
result = hsakmt_gpuid_to_nodeid(ctx, event_data[i].hw_exception_data.gpu_id, &Events[i]->EventData.EventData.HwException.NodeId);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
|
||||
@@ -515,7 +515,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMICtx(HsaKFDContext *ctx, HSAuint32 NodeId, i
|
||||
|
||||
pr_debug("[%s] node %d\n", __func__, NodeId);
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpuid);
|
||||
result = hsakmt_validate_nodeid(ctx, NodeId, &gpuid);
|
||||
if (result != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
|
||||
return result;
|
||||
|
||||
檔案差異因為檔案過大而無法顯示
載入差異
@@ -740,6 +740,156 @@ hsaKmtAllocQueueGWSCtx(
|
||||
HSAuint32 *firstGWS //OUT
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRuntimeEnableCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* rDebug, //IN
|
||||
bool setupTtmp //IN
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRuntimeDisableCtx(
|
||||
HsaKFDContext *ctx //IN
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetRuntimeCapabilitiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 *caps_mask //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Enable debug trap.
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgEnableCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void **runtime_info, //Out
|
||||
HSAuint32 *data_size //Out
|
||||
);
|
||||
|
||||
/**
|
||||
Disable debug trap.
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgDisableCtx(
|
||||
HsaKFDContext *ctx //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Get device snapshot.
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgGetDeviceDataCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void **data, //Out
|
||||
HSAuint32 *n_entries, //Out
|
||||
HSAuint32 *entry_size //Out
|
||||
);
|
||||
|
||||
/**
|
||||
Get queues snapshot.
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDbgGetQueueDataCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void **data, //Out
|
||||
HSAuint32 *n_entries, //Out
|
||||
HSAuint32 *entry_size, //Out
|
||||
bool suspend_queues //In
|
||||
);
|
||||
|
||||
/**
|
||||
Check whether gpu firmware and kernel support debugging
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCheckRuntimeDebugSupportCtx(
|
||||
HsaKFDContext *ctx //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Debug ops call primarily used for KFD testing
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDebugTrapIoctlCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
struct kfd_ioctl_dbg_trap_args *args, //IN/OUT
|
||||
HSA_QUEUEID *Queues, //IN
|
||||
HSAuint64 *DebugReturn //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Gets GPU and CPU clock counters for particular Node
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetClockCountersCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaClockCounters *Counters); //OUT
|
||||
|
||||
/**
|
||||
Retrieves information on the available HSA counters
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcGetCounterPropertiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaCounterProperties** CounterProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Registers a set of (HW) counters to be used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcRegisterTraceCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumberOfCounters, //IN
|
||||
HsaCounter* Counters, //IN
|
||||
HsaPmcTraceRoot* TraceRoot //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Allows a user mode process to get exclusive access to the defined set of (HW) counters
|
||||
used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcAcquireTraceAccessCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Allows a user mode process to release exclusive access to the defined set of (HW) counters
|
||||
used for tracing/profiling
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtPmcReleaseTraceAccessCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSATraceId TraceId //IN
|
||||
);
|
||||
|
||||
/* Helper functions for calling KFD SVM ioctl */
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
|
||||
@@ -282,7 +282,7 @@ void model_init(void)
|
||||
for (unsigned node_id = 0; node_id < props.NumNodes; node_id++)
|
||||
{
|
||||
HsaNodeProperties node_props;
|
||||
result = hsakmt_topology_get_node_props(node_id, &node_props);
|
||||
result = hsakmt_topology_get_node_props(&hsakmt_primary_kfd_ctx, node_id, &node_props);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "model: Failed to get node %u properties\n", node_id);
|
||||
|
||||
@@ -37,9 +37,12 @@ void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx)
|
||||
assert(ctx);
|
||||
|
||||
ctx->fd = fd;
|
||||
ctx->topology_context = NULL;
|
||||
ctx->queue_context = NULL;
|
||||
ctx->fmm_context = NULL;
|
||||
ctx->event_context = NULL;
|
||||
ctx->debug_context = NULL;
|
||||
ctx->perf_context = NULL;
|
||||
}
|
||||
|
||||
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
|
||||
@@ -47,6 +50,10 @@ void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
if (ctx->topology_context) {
|
||||
free(ctx->topology_context);
|
||||
ctx->topology_context = NULL;
|
||||
}
|
||||
if (ctx->queue_context) {
|
||||
free(ctx->queue_context);
|
||||
ctx->queue_context = NULL;
|
||||
@@ -59,5 +66,13 @@ void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
|
||||
free(ctx->event_context);
|
||||
ctx->event_context = NULL;
|
||||
}
|
||||
if (ctx->debug_context) {
|
||||
free(ctx->debug_context);
|
||||
ctx->debug_context = NULL;
|
||||
}
|
||||
if (ctx->perf_context) {
|
||||
free(ctx->perf_context);
|
||||
ctx->perf_context = NULL;
|
||||
}
|
||||
ctx->fd = -1;
|
||||
}
|
||||
|
||||
@@ -28,9 +28,12 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct hsa_kfd_topology_context;
|
||||
struct hsa_kfd_queue_context;
|
||||
struct hsa_kfd_fmm_context;
|
||||
struct hsa_kfd_event_context;
|
||||
struct hsa_kfd_debug_context;
|
||||
struct hsa_kfd_perf_context;
|
||||
|
||||
/*
|
||||
* HsaKFDContext
|
||||
@@ -52,6 +55,9 @@ typedef struct _HsaKFDContext
|
||||
/* File descriptor for the KFD device */
|
||||
int fd;
|
||||
|
||||
/* Topology context for managing system topology information */
|
||||
struct hsa_kfd_topology_context *topology_context;
|
||||
|
||||
/* Queue context for managing user queues */
|
||||
struct hsa_kfd_queue_context *queue_context;
|
||||
|
||||
@@ -60,6 +66,12 @@ typedef struct _HsaKFDContext
|
||||
|
||||
/* Event context for managing events */
|
||||
struct hsa_kfd_event_context *event_context;
|
||||
|
||||
/* Debug context for managing debug operations */
|
||||
struct hsa_kfd_debug_context *debug_context;
|
||||
|
||||
/* perf context for managing perf operations */
|
||||
struct hsa_kfd_perf_context *perf_context;
|
||||
} HsaKFDContext;
|
||||
|
||||
// Initialize a pre-allocated HsaKFDContext with the given file descriptor
|
||||
@@ -67,8 +79,10 @@ void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx);
|
||||
// Release all resources associated with the given KFD context
|
||||
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx);
|
||||
|
||||
struct hsa_kfd_topology_context *hsakmt_kfdcontext_get_topology_context(HsaKFDContext *ctx);
|
||||
struct hsa_kfd_fmm_context *hsakmt_kfdcontext_get_fmm_context(HsaKFDContext *ctx);
|
||||
struct hsa_kfd_queue_context *hsakmt_kfdcontext_get_queue_context(HsaKFDContext *ctx);
|
||||
struct hsa_kfd_event_context *hsakmt_kfdcontext_get_event_context(HsaKFDContext *ctx);
|
||||
|
||||
struct hsa_kfd_debug_context *hsakmt_kfdcontext_get_debug_context(HsaKFDContext *ctx);
|
||||
struct hsa_kfd_perf_context *hsakmt_kfdcontext_get_perf_context(HsaKFDContext *ctx);
|
||||
#endif /* _KFDCONTEXT_H_ */
|
||||
|
||||
@@ -188,23 +188,26 @@ HSAKMT_STATUS hsakmt_init_kfd_version(void);
|
||||
|
||||
#define IS_SOC15(gfxv) ((gfxv) >= GFX_VERSION_VEGA10)
|
||||
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
|
||||
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
|
||||
uint32_t hsakmt_get_gfxv_by_node_id(HSAuint32 node_id);
|
||||
bool hsakmt_prefer_ats(HSAuint32 node_id);
|
||||
uint16_t hsakmt_get_device_id_by_node_id(HSAuint32 node_id);
|
||||
uint16_t hsakmt_get_device_id_by_gpu_id(HSAuint32 gpu_id);
|
||||
uint32_t hsakmt_get_direct_link_cpu(uint32_t gpu_node);
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid(HsaKFDContext *ctx, uint32_t nodeid, uint32_t *gpu_id);
|
||||
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(HsaKFDContext *ctx, uint32_t gpu_id, uint32_t* node_id);
|
||||
uint32_t hsakmt_get_gfxv_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id);
|
||||
bool hsakmt_prefer_ats(HsaKFDContext *ctx, HSAuint32 node_id);
|
||||
uint16_t hsakmt_get_device_id_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id);
|
||||
uint16_t hsakmt_get_device_id_by_gpu_id(HsaKFDContext *ctx, HSAuint32 gpu_id);
|
||||
uint32_t hsakmt_get_direct_link_cpu(HsaKFDContext *ctx, uint32_t gpu_node);
|
||||
int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid_array(HsaKFDContext *ctx,
|
||||
uint32_t **gpu_id_array,
|
||||
uint32_t NumberOfNodes, uint32_t *NodeArray);
|
||||
|
||||
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx, HsaSystemProperties *props);
|
||||
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS hsakmt_topology_get_node_props(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HsaNodeProperties *NodeProperties);
|
||||
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
|
||||
HSAuint32 NumIoLinks,
|
||||
HsaIoLinkProperties *IoLinkProperties);
|
||||
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint32 NumIoLinks,
|
||||
HsaIoLinkProperties *IoLinkProperties);
|
||||
void hsakmt_topology_setup_is_dgpu_param(HsaNodeProperties *props);
|
||||
bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId);
|
||||
|
||||
@@ -212,7 +215,7 @@ HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags);
|
||||
HSAuint64 MapDrmPerm(HsaMemoryMapFlags flags);
|
||||
|
||||
void* hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
|
||||
uint32_t size, uint32_t align,
|
||||
uint32_t size, uint32_t align,
|
||||
uint32_t gpu_id,
|
||||
uint32_t NodeId, bool NonPaged,
|
||||
bool DeviceLocal, bool Uncached);
|
||||
@@ -221,11 +224,11 @@ void hsakmt_free_exec_aligned_memory_gpu(HsaKFDContext *ctx,
|
||||
HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx,
|
||||
unsigned int NumNodes);
|
||||
void hsakmt_destroy_process_doorbells(HsaKFDContext *ctx);
|
||||
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes);
|
||||
void hsakmt_destroy_device_debugging_memory(void);
|
||||
bool hsakmt_debug_get_reg_status(uint32_t node_id);
|
||||
HSAKMT_STATUS hsakmt_init_counter_props(unsigned int NumNodes);
|
||||
void hsakmt_destroy_counter_props(void);
|
||||
HSAKMT_STATUS hsakmt_init_device_debugging_memory(HsaKFDContext *ctx, unsigned int NumNodes);
|
||||
void hsakmt_destroy_device_debugging_memory(HsaKFDContext *ctx);
|
||||
bool hsakmt_debug_get_reg_status(HsaKFDContext *ctx, uint32_t node_id);
|
||||
HSAKMT_STATUS hsakmt_init_counter_props(HsaKFDContext *ctx, unsigned int NumNodes);
|
||||
void hsakmt_destroy_counter_props(HsaKFDContext *ctx);
|
||||
uint32_t *hsakmt_convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues);
|
||||
|
||||
extern int hsakmt_ioctl(int fd, unsigned long request, void *arg);
|
||||
@@ -250,7 +253,7 @@ void hsakmt_clear_events_page(HsaKFDContext *ctx);
|
||||
void hsakmt_fmm_clear_all_mem(HsaKFDContext *ctx);
|
||||
void hsakmt_fmm_clear_all_aperture(HsaKFDContext *ctx);
|
||||
void hsakmt_clear_process_doorbells(HsaKFDContext *ctx);
|
||||
uint32_t hsakmt_get_num_sysfs_nodes(void);
|
||||
uint32_t hsakmt_get_num_sysfs_nodes(HsaKFDContext *ctx);
|
||||
|
||||
bool hsakmt_is_forked_child(void);
|
||||
|
||||
|
||||
@@ -55,11 +55,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicyCtx(HsaKFDContext *ctx,
|
||||
pr_debug("[%s] node %d; default %d; alternate %d\n",
|
||||
__func__, Node, DefaultPolicy, AlternatePolicy);
|
||||
|
||||
result = hsakmt_validate_nodeid(Node, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, Node, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (hsakmt_get_gfxv_by_node_id(Node) != GFX_VERSION_KAVERI)
|
||||
if (hsakmt_get_gfxv_by_node_id(ctx, Node) != GFX_VERSION_KAVERI)
|
||||
/* This is a legacy API useful on Kaveri only. On dGPU
|
||||
* the alternate aperture is setup and used
|
||||
* automatically for coherent allocations. Don't let
|
||||
@@ -137,7 +137,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlignCtx(HsaKFDContext *ctx,
|
||||
|
||||
pr_debug("[%s] node %d\n", __func__, PreferredNode);
|
||||
|
||||
result = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, PreferredNode, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
|
||||
return result;
|
||||
@@ -254,7 +254,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemoryCtx(HsaKFDContext *ctx,
|
||||
|
||||
pr_debug("[%s] node %d\n", __func__, Node);
|
||||
|
||||
result = hsakmt_validate_nodeid(Node, &args.gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, Node, &args.gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, Node);
|
||||
return result;
|
||||
@@ -304,7 +304,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodesCtx(HsaKFDContext *ctx,
|
||||
/* TODO: support mixed APU and dGPU configurations */
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
|
||||
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
|
||||
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array,
|
||||
NumberOfNodes, NodeArray);
|
||||
|
||||
if (ret == HSAKMT_STATUS_SUCCESS) {
|
||||
@@ -385,7 +385,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExtCtx(HsaKFDContext
|
||||
pr_debug("[%s] number of nodes %lu\n", __func__, NumberOfNodes);
|
||||
|
||||
if (NodeArray != NULL || NumberOfNodes != 0) {
|
||||
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
|
||||
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array,
|
||||
NumberOfNodes, NodeArray);
|
||||
}
|
||||
|
||||
@@ -467,7 +467,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodesCtx(HsaKFDContext *ctx,
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (NodeArray) {
|
||||
ret = hsakmt_validate_nodeid_array(&gpu_id_array, NumberOfNodes, NodeArray);
|
||||
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array, NumberOfNodes, NodeArray);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
@@ -567,7 +567,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodesCtx(HsaKFDContext *ctx,
|
||||
return hsaKmtMapMemoryToGPUCtx(ctx, MemoryAddress,
|
||||
MemorySizeInBytes, AlternateVAGPU);
|
||||
|
||||
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
|
||||
ret = hsakmt_validate_nodeid_array(ctx, &gpu_id_array,
|
||||
NumberOfNodes, NodeArray);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
@@ -633,7 +633,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfigCtx(HsaKFDContext *ctx,
|
||||
|
||||
pr_debug("[%s] node %d\n", __func__, NodeId);
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
||||
@@ -106,7 +106,7 @@ static void clear_after_fork(HsaKFDContext *ctx)
|
||||
hsakmt_clear_process_doorbells(ctx);
|
||||
hsakmt_clear_events_page(ctx);
|
||||
hsakmt_fmm_clear_all_mem(ctx);
|
||||
hsakmt_destroy_device_debugging_memory();
|
||||
hsakmt_destroy_device_debugging_memory(ctx);
|
||||
|
||||
int fd = ctx->fd;
|
||||
if (fd >= 0) {
|
||||
@@ -226,10 +226,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFDCtx(HsaKFDContext **pCtx)
|
||||
|
||||
hsakmt_kfd_open_count = 1;
|
||||
|
||||
if (hsakmt_init_device_debugging_memory(sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS)
|
||||
if (hsakmt_init_device_debugging_memory(&hsakmt_primary_kfd_ctx, sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS)
|
||||
pr_warn("Insufficient Memory. Debugging unavailable\n");
|
||||
|
||||
hsakmt_init_counter_props(sys_props.NumNodes);
|
||||
hsakmt_init_counter_props(&hsakmt_primary_kfd_ctx, sys_props.NumNodes);
|
||||
*pCtx = &hsakmt_primary_kfd_ctx;
|
||||
|
||||
if (!atfork_installed) {
|
||||
@@ -269,8 +269,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFDCtx(void)
|
||||
|
||||
if (hsakmt_kfd_open_count > 0) {
|
||||
if (--hsakmt_kfd_open_count == 0) {
|
||||
hsakmt_destroy_counter_props();
|
||||
hsakmt_destroy_device_debugging_memory();
|
||||
hsakmt_destroy_counter_props(&hsakmt_primary_kfd_ctx);
|
||||
hsakmt_destroy_device_debugging_memory(&hsakmt_primary_kfd_ctx);
|
||||
hsakmt_fmm_clear_all_aperture(&hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingQueryCapabilities(HSAuint32 NodeId, void
|
||||
CHECK_KFD_OPEN();
|
||||
CHECK_KFD_MINOR_VERSION(16);
|
||||
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
|
||||
return ret;
|
||||
@@ -99,7 +99,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingCreate(HSAuint32 NodeId, HsaPcSamplingIn
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
*traceId = INVALID_TRACE_ID;
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
|
||||
return ret;
|
||||
@@ -139,7 +139,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingDestroy(HSAuint32 NodeId, HsaPcSamplingT
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
|
||||
return ret;
|
||||
@@ -171,7 +171,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStart(HSAuint32 NodeId, HsaPcSamplingTra
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
|
||||
return ret;
|
||||
@@ -210,7 +210,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStop(HSAuint32 NodeId, HsaPcSamplingTrac
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
HSAKMT_STATUS ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, NodeId, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
|
||||
return ret;
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <semaphore.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define BITS_PER_BYTE CHAR_BIT
|
||||
|
||||
@@ -75,8 +76,32 @@ struct perf_counts_values {
|
||||
};
|
||||
};
|
||||
|
||||
static HsaCounterProperties **counter_props;
|
||||
static unsigned int counter_props_count;
|
||||
struct hsa_kfd_perf_context
|
||||
{
|
||||
HsaCounterProperties **counter_props;
|
||||
unsigned int counter_props_count;
|
||||
};
|
||||
|
||||
struct hsa_kfd_perf_context *hsakmt_kfdcontext_get_perf_context(HsaKFDContext *ctx)
|
||||
{
|
||||
assert(ctx);
|
||||
if (!ctx) {
|
||||
pr_err("Expected a non-null ptr for HsaKFDContext");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ctx->perf_context)
|
||||
return ctx->perf_context;
|
||||
|
||||
ctx->perf_context = calloc(1, sizeof(struct hsa_kfd_perf_context));
|
||||
if (!ctx->perf_context) {
|
||||
pr_err("Alloc memory failed for struct hsa_kfd_perf_context size %zu\n",
|
||||
sizeof(struct hsa_kfd_perf_context));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ctx->perf_context;
|
||||
}
|
||||
|
||||
static ssize_t readn(int fd, void *buf, size_t n)
|
||||
{
|
||||
@@ -99,33 +124,35 @@ static ssize_t readn(int fd, void *buf, size_t n)
|
||||
return n;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_init_counter_props(unsigned int NumNodes)
|
||||
HSAKMT_STATUS hsakmt_init_counter_props(HsaKFDContext *ctx, unsigned int NumNodes)
|
||||
{
|
||||
counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
|
||||
if (!counter_props) {
|
||||
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
||||
perf_ctx->counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
|
||||
if (!perf_ctx->counter_props) {
|
||||
pr_warn("Profiling is not available.\n");
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
}
|
||||
|
||||
counter_props_count = NumNodes;
|
||||
perf_ctx->counter_props_count = NumNodes;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void hsakmt_destroy_counter_props(void)
|
||||
void hsakmt_destroy_counter_props(HsaKFDContext *ctx)
|
||||
{
|
||||
unsigned int i;
|
||||
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
||||
|
||||
if (!counter_props)
|
||||
if (!perf_ctx->counter_props)
|
||||
return;
|
||||
|
||||
for (i = 0; i < counter_props_count; i++)
|
||||
if (counter_props[i]) {
|
||||
free(counter_props[i]);
|
||||
counter_props[i] = NULL;
|
||||
for (i = 0; i < perf_ctx->counter_props_count; i++)
|
||||
if (perf_ctx->counter_props[i]) {
|
||||
free(perf_ctx->counter_props[i]);
|
||||
perf_ctx->counter_props[i] = NULL;
|
||||
}
|
||||
|
||||
free(counter_props);
|
||||
free(perf_ctx->counter_props);
|
||||
}
|
||||
|
||||
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
|
||||
@@ -211,11 +238,12 @@ static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static HSAuint32 get_block_concurrent_limit(uint32_t node_id,
|
||||
static HSAuint32 get_block_concurrent_limit(struct hsa_kfd_perf_context *perf_ctx,
|
||||
uint32_t node_id,
|
||||
HSAuint32 block_id)
|
||||
{
|
||||
uint32_t i;
|
||||
HsaCounterBlockProperties *block = &counter_props[node_id]->Blocks[0];
|
||||
HsaCounterBlockProperties *block = &perf_ctx->counter_props[node_id]->Blocks[0];
|
||||
|
||||
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
||||
if (block->Counters[0].BlockIndex == block_id)
|
||||
@@ -254,7 +282,8 @@ static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterPropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HsaCounterProperties **CounterProperties)
|
||||
{
|
||||
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
|
||||
@@ -265,23 +294,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
|
||||
struct perf_counter_block block = {0};
|
||||
uint32_t total_blocks = 0;
|
||||
HsaCounterBlockProperties *block_prop;
|
||||
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
||||
|
||||
if (!counter_props)
|
||||
if (!perf_ctx->counter_props)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
if (!CounterProperties)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
||||
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
if (counter_props[NodeId]) {
|
||||
*CounterProperties = counter_props[NodeId];
|
||||
if (perf_ctx->counter_props[NodeId]) {
|
||||
*CounterProperties = perf_ctx->counter_props[NodeId];
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
||||
rc = hsakmt_get_block_properties(NodeId, i, &block);
|
||||
rc = hsakmt_get_block_properties(ctx, NodeId, i, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS)
|
||||
return rc;
|
||||
total_concurrent += block.num_of_slots;
|
||||
@@ -295,19 +325,19 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
|
||||
sizeof(HsaCounterBlockProperties) * (total_blocks - 1) +
|
||||
sizeof(HsaCounter) * (total_counters - total_blocks);
|
||||
|
||||
counter_props[NodeId] = malloc(counter_props_size);
|
||||
if (!counter_props[NodeId])
|
||||
perf_ctx->counter_props[NodeId] = malloc(counter_props_size);
|
||||
if (!perf_ctx->counter_props[NodeId])
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
counter_props[NodeId]->NumBlocks = total_blocks;
|
||||
counter_props[NodeId]->NumConcurrent = total_concurrent;
|
||||
perf_ctx->counter_props[NodeId]->NumBlocks = total_blocks;
|
||||
perf_ctx->counter_props[NodeId]->NumConcurrent = total_concurrent;
|
||||
|
||||
block_prop = &counter_props[NodeId]->Blocks[0];
|
||||
block_prop = &perf_ctx->counter_props[NodeId]->Blocks[0];
|
||||
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
|
||||
rc = hsakmt_get_block_properties(NodeId, block_id, &block);
|
||||
rc = hsakmt_get_block_properties(ctx, NodeId, block_id, &block);
|
||||
if (rc != HSAKMT_STATUS_SUCCESS) {
|
||||
free(counter_props[NodeId]);
|
||||
counter_props[NodeId] = NULL;
|
||||
free(perf_ctx->counter_props[NodeId]);
|
||||
perf_ctx->counter_props[NodeId] = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -329,13 +359,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
|
||||
block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
|
||||
}
|
||||
|
||||
*CounterProperties = counter_props[NodeId];
|
||||
*CounterProperties = perf_ctx->counter_props[NodeId];
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/* Registers a set of (HW) counters to be used for tracing/profiling */
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTraceCtx(HsaKFDContext* ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint32 NumberOfCounters,
|
||||
HsaCounter *Counters,
|
||||
HsaPmcTraceRoot *TraceRoot)
|
||||
@@ -353,6 +384,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
uint32_t block, num_blocks = 0, total_counters = 0;
|
||||
uint64_t *counter_id_ptr;
|
||||
int *fd_ptr;
|
||||
struct hsa_kfd_perf_context *perf_ctx = hsakmt_kfdcontext_get_perf_context(ctx);
|
||||
|
||||
pr_debug("[%s] Number of counters %d\n", __func__, NumberOfCounters);
|
||||
|
||||
@@ -362,7 +394,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
}
|
||||
|
||||
if (!counter_props) {
|
||||
if (!perf_ctx->counter_props) {
|
||||
pr_err("Profiling is not available, counter_props is NULL.\n");
|
||||
goto no_memory_exit;
|
||||
}
|
||||
@@ -370,7 +402,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
if (!Counters || !TraceRoot || NumberOfCounters == 0)
|
||||
goto invalid_parameter_exit;
|
||||
|
||||
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) {
|
||||
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) {
|
||||
free(counter_id);
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
}
|
||||
@@ -408,7 +440,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
|
||||
if (!num_counters[i])
|
||||
continue;
|
||||
concurrent_limit = get_block_concurrent_limit(NodeId, i);
|
||||
concurrent_limit = get_block_concurrent_limit(perf_ctx, NodeId, i);
|
||||
if (!concurrent_limit) {
|
||||
pr_err("Invalid block ID: %d\n", i);
|
||||
goto invalid_parameter_exit;
|
||||
@@ -509,7 +541,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
|
||||
/* Unregisters a set of (HW) counters used for tracing/profiling */
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTraceCtx(HsaKFDContext* ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSATraceId TraceId)
|
||||
{
|
||||
uint32_t gpu_id;
|
||||
@@ -520,7 +553,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
|
||||
if (TraceId == 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
||||
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
|
||||
@@ -544,7 +577,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccessCtx(HsaKFDContext* ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSATraceId TraceId)
|
||||
{
|
||||
struct perf_trace *trace;
|
||||
@@ -561,7 +595,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
|
||||
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
if (hsakmt_validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
||||
if (hsakmt_validate_nodeid(ctx, NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
|
||||
return ret;
|
||||
@@ -692,3 +726,32 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
|
||||
HsaCounterProperties **CounterProperties)
|
||||
{
|
||||
return hsaKmtPmcGetCounterPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, CounterProperties);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
|
||||
HSAuint32 NumberOfCounters,
|
||||
HsaCounter *Counters,
|
||||
HsaPmcTraceRoot *TraceRoot)
|
||||
{
|
||||
return hsaKmtPmcRegisterTraceCtx(&hsakmt_primary_kfd_ctx,
|
||||
NodeId, NumberOfCounters, Counters, TraceRoot);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
|
||||
HSATraceId TraceId)
|
||||
{
|
||||
return hsaKmtPmcUnregisterTraceCtx(&hsakmt_primary_kfd_ctx,
|
||||
NodeId, TraceId);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
|
||||
HSATraceId TraceId)
|
||||
{
|
||||
return hsaKmtPmcAcquireTraceAccessCtx(&hsakmt_primary_kfd_ctx,
|
||||
NodeId, TraceId);
|
||||
}
|
||||
@@ -1958,12 +1958,13 @@ static struct perf_counter_block navi_blocks[PERFCOUNTER_BLOCKID__MAX] = {
|
||||
},
|
||||
};
|
||||
|
||||
HSAKMT_STATUS hsakmt_get_block_properties(uint32_t node_id,
|
||||
HSAKMT_STATUS hsakmt_get_block_properties(HsaKFDContext *ctx,
|
||||
uint32_t node_id,
|
||||
enum perf_block_id block_id,
|
||||
struct perf_counter_block *block)
|
||||
{
|
||||
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(node_id);
|
||||
uint16_t dev_id = hsakmt_get_device_id_by_node_id(node_id);
|
||||
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(ctx, node_id);
|
||||
uint16_t dev_id = hsakmt_get_device_id_by_node_id(ctx, node_id);
|
||||
|
||||
if (block_id >= PERFCOUNTER_BLOCKID__MAX ||
|
||||
block_id < PERFCOUNTER_BLOCKID__FIRST)
|
||||
|
||||
@@ -67,7 +67,8 @@ struct perf_counter_block {
|
||||
uint64_t counter_mask;
|
||||
};
|
||||
|
||||
HSAKMT_STATUS hsakmt_get_block_properties(uint32_t node_id,
|
||||
HSAKMT_STATUS hsakmt_get_block_properties(HsaKFDContext *ctx,
|
||||
uint32_t node_id,
|
||||
enum perf_block_id block_id,
|
||||
struct perf_counter_block *block);
|
||||
|
||||
|
||||
@@ -148,14 +148,15 @@ HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx, unsigned int Num
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void get_doorbell_map_info(uint32_t node_id,
|
||||
static void get_doorbell_map_info(HsaKFDContext *ctx,
|
||||
uint32_t node_id,
|
||||
struct process_doorbells *doorbell)
|
||||
{
|
||||
/*
|
||||
* GPUVM doorbell on Tonga requires a workaround for VM TLB ACTIVE bit
|
||||
* lookup bug. Remove ASIC check when this is implemented in amdgpu.
|
||||
*/
|
||||
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(node_id);
|
||||
uint32_t gfxv = hsakmt_get_gfxv_by_node_id(ctx, node_id);
|
||||
doorbell->use_gpuvm = (hsakmt_is_dgpu && gfxv != GFX_VERSION_TONGA);
|
||||
doorbell->size = DOORBELLS_PAGE_SIZE(DOORBELL_SIZE(gfxv));
|
||||
|
||||
@@ -272,7 +273,7 @@ static HSAKMT_STATUS map_doorbell(HsaKFDContext *ctx,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
get_doorbell_map_info(NodeId, &doorbells[NodeId]);
|
||||
get_doorbell_map_info(ctx, NodeId, &doorbells[NodeId]);
|
||||
|
||||
if (doorbells[NodeId].use_gpuvm) {
|
||||
status = map_doorbell_dgpu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
|
||||
@@ -385,7 +386,7 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
|
||||
* nonPaged=0 system memory allocation uses GTT path
|
||||
*/
|
||||
if (!nonPaged) {
|
||||
cpu_id = hsakmt_get_direct_link_cpu(NodeId);
|
||||
cpu_id = hsakmt_get_direct_link_cpu(ctx, NodeId);
|
||||
if (cpu_id == INVALID_NODEID) {
|
||||
flags.ui32.NoNUMABind = 1;
|
||||
cpu_id = 0;
|
||||
@@ -460,7 +461,8 @@ static void free_exec_aligned_memory(HsaKFDContext *ctx,
|
||||
munmap(addr, size);
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
|
||||
static HSAKMT_STATUS register_svm_range(HsaKFDContext *ctx,
|
||||
void *mem, uint32_t size,
|
||||
uint32_t gpuNode, uint32_t prefetchNode,
|
||||
uint32_t preferredNode, bool alwaysMapped)
|
||||
{
|
||||
@@ -493,7 +495,7 @@ static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
|
||||
attrs[5].type = HSA_SVM_ATTR_GRANULARITY;
|
||||
attrs[5].value = 0xFF;
|
||||
|
||||
return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
|
||||
return hsaKmtSVMSetAttrCtx(ctx, mem, size, nattr, attrs);
|
||||
}
|
||||
|
||||
static void free_queue(HsaKFDContext *ctx, struct queue *q)
|
||||
@@ -599,7 +601,7 @@ static int handle_concrete_asic(HsaKFDContext *ctx,
|
||||
|
||||
fill_cwsr_header(q, addr, Event, ErrPayload, node.NumXcc);
|
||||
|
||||
HSAKMT_STATUS r = register_svm_range(addr, size,
|
||||
HSAKMT_STATUS r = register_svm_range(ctx, addr, size,
|
||||
NodeId, NodeId, 0, true);
|
||||
|
||||
if (r == HSAKMT_STATUS_SUCCESS) {
|
||||
@@ -680,7 +682,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExtCtx(HsaKFDContext *ctx,
|
||||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -691,7 +693,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExtCtx(HsaKFDContext *ctx,
|
||||
|
||||
memset(q, 0, sizeof(*q));
|
||||
|
||||
q->gfxv = hsakmt_get_gfxv_by_node_id(NodeId);
|
||||
q->gfxv = hsakmt_get_gfxv_by_node_id(ctx, NodeId);
|
||||
q->use_ats = false;
|
||||
|
||||
if (q->gfxv == GFX_VERSION_TONGA)
|
||||
@@ -932,7 +934,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandlerCtx(HsaKFDContext *ctx,
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
result = hsakmt_validate_nodeid(Node, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, Node, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMAcquire(HSAuint32 PreferredNode)
|
||||
struct kfd_ioctl_spm_args args = {0};
|
||||
uint32_t gpu_id;
|
||||
|
||||
ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
|
||||
ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, PreferredNode, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
|
||||
return ret;
|
||||
@@ -61,7 +61,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode,
|
||||
struct kfd_ioctl_spm_args args = {0};
|
||||
uint32_t gpu_id = 0;
|
||||
|
||||
ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
|
||||
ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, PreferredNode, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -87,7 +87,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMRelease(HSAuint32 PreferredNode)
|
||||
struct kfd_ioctl_spm_args args = {0};
|
||||
uint32_t gpu_id;
|
||||
|
||||
ret = hsakmt_validate_nodeid(PreferredNode, &gpu_id);
|
||||
ret = hsakmt_validate_nodeid(&hsakmt_primary_kfd_ctx, PreferredNode, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
|
||||
return ret;
|
||||
|
||||
@@ -81,7 +81,7 @@ hsaKmtSVMSetAttrCtx(HsaKFDContext *ctx,
|
||||
continue;
|
||||
}
|
||||
|
||||
r = hsakmt_validate_nodeid(attrs[i].value, &args->attrs[i].value);
|
||||
r = hsakmt_validate_nodeid(ctx, attrs[i].value, &args->attrs[i].value);
|
||||
if (r != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_debug("invalid node ID: %d\n", attrs[i].value);
|
||||
return r;
|
||||
@@ -141,7 +141,7 @@ hsaKmtSVMGetAttrCtx(HsaKFDContext *ctx,
|
||||
attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
|
||||
continue;
|
||||
|
||||
r = hsakmt_validate_nodeid(attrs[i].value, &args->attrs[i].value);
|
||||
r = hsakmt_validate_nodeid(ctx, attrs[i].value, &args->attrs[i].value);
|
||||
if (r != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_debug("invalid node ID: %d\n", attrs[i].value);
|
||||
return r;
|
||||
@@ -176,7 +176,7 @@ hsaKmtSVMGetAttrCtx(HsaKFDContext *ctx,
|
||||
attrs[i].value = INVALID_NODEID;
|
||||
break;
|
||||
default:
|
||||
r = hsakmt_gpuid_to_nodeid(attrs[i].value, &attrs[i].value);
|
||||
r = hsakmt_gpuid_to_nodeid(ctx, attrs[i].value, &attrs[i].value);
|
||||
if (r != HSAKMT_STATUS_SUCCESS) {
|
||||
pr_debug("invalid GPU ID: %d\n",
|
||||
attrs[i].value);
|
||||
|
||||
@@ -26,7 +26,8 @@
|
||||
#include "libhsakmt.h"
|
||||
#include "hsakmt/linux/kfd_ioctl.h"
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCountersCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HsaClockCounters *Counters)
|
||||
{
|
||||
HSAKMT_STATUS result;
|
||||
@@ -36,13 +37,13 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
result = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
result = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
|
||||
err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
|
||||
if (err < 0) {
|
||||
result = HSAKMT_STATUS_ERROR;
|
||||
} else {
|
||||
@@ -55,3 +56,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
|
||||
HsaClockCounters *Counters)
|
||||
{
|
||||
return hsaKmtGetClockCountersCtx(&hsakmt_primary_kfd_ctx, NodeId, Counters);
|
||||
}
|
||||
|
||||
@@ -72,16 +72,6 @@ typedef struct {
|
||||
HsaIoLinkProperties *link;
|
||||
} node_props_t;
|
||||
|
||||
static HsaSystemProperties *g_system;
|
||||
static node_props_t *g_props;
|
||||
|
||||
/* This array caches sysfs based node IDs of CPU nodes + all supported GPU nodes.
|
||||
* It will be used to map user-node IDs to sysfs-node IDs.
|
||||
*/
|
||||
static uint32_t *map_user_to_sysfs_node_id;
|
||||
static uint32_t map_user_to_sysfs_node_id_size;
|
||||
static uint32_t num_sysfs_nodes;
|
||||
|
||||
static int processor_vendor = -1;
|
||||
/* Supported System Vendors */
|
||||
enum SUPPORTED_PROCESSOR_VENDORS {
|
||||
@@ -96,8 +86,45 @@ static const char *supported_processor_vendor_name[] = {
|
||||
"\n" // POWER requires a different search method
|
||||
};
|
||||
|
||||
/*
|
||||
* KFD Topology Context
|
||||
*/
|
||||
struct hsa_kfd_topology_context
|
||||
{
|
||||
HsaSystemProperties* system_props;
|
||||
node_props_t *node_props;
|
||||
|
||||
/* This array caches sysfs based node IDs of CPU nodes + all supported GPU nodes.
|
||||
* It will be used to map user-node IDs to sysfs-node IDs.
|
||||
*/
|
||||
uint32_t *map_user_to_sysfs_node_id;
|
||||
uint32_t map_user_to_sysfs_node_id_size;
|
||||
|
||||
uint32_t num_sysfs_nodes;
|
||||
};
|
||||
|
||||
struct hsa_kfd_topology_context *hsakmt_kfdcontext_get_topology_context(HsaKFDContext *ctx)
|
||||
{
|
||||
assert(ctx);
|
||||
if (!ctx) {
|
||||
pr_err("Expected a non-null ptr for HsaKFDContext");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ctx->topology_context)
|
||||
return ctx->topology_context;
|
||||
|
||||
ctx->topology_context = calloc(1, sizeof(struct hsa_kfd_topology_context));
|
||||
if (!ctx->topology_context) {
|
||||
pr_err("Alloc memory failed for struct hsa_kfd_topology_context size %zu\n",
|
||||
sizeof(struct hsa_kfd_topology_context));
|
||||
return NULL;
|
||||
}
|
||||
return ctx->topology_context;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx);
|
||||
static void topology_drop_snapshot(void);
|
||||
static void topology_drop_snapshot(HsaKFDContext *ctx);
|
||||
|
||||
static const struct hsa_gfxip_table gfxip_lookup_table[] = {
|
||||
/* Kaveri Family */
|
||||
@@ -610,12 +637,15 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS topology_sysfs_map_node_id(uint32_t node_id, uint32_t *sys_node_id)
|
||||
static HSAKMT_STATUS topology_sysfs_map_node_id(
|
||||
struct hsa_kfd_topology_context *topology_ctx,
|
||||
uint32_t node_id, uint32_t *sys_node_id)
|
||||
{
|
||||
if ((!map_user_to_sysfs_node_id) || (node_id >= map_user_to_sysfs_node_id_size))
|
||||
if ((!topology_ctx->map_user_to_sysfs_node_id) ||
|
||||
(node_id >= topology_ctx->map_user_to_sysfs_node_id_size))
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
|
||||
*sys_node_id = map_user_to_sysfs_node_id[node_id];
|
||||
*sys_node_id = topology_ctx->map_user_to_sysfs_node_id[node_id];
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -737,6 +767,7 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
|
||||
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
||||
bool is_node_supported = true;
|
||||
uint32_t num_supported_nodes = 0;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
assert(props);
|
||||
snprintf(path, sizeof(path), KFD_SYSFS_PATH_SYSTEM_PROPERTIES, get_topology_dir());
|
||||
@@ -779,34 +810,34 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
|
||||
* which represent the node numbers
|
||||
*/
|
||||
snprintf(path, sizeof(path), KFD_SYSFS_PATH_NODES, get_topology_dir());
|
||||
num_sysfs_nodes = num_subdirs(path, "");
|
||||
topology_ctx->num_sysfs_nodes = num_subdirs(path, "");
|
||||
|
||||
if (map_user_to_sysfs_node_id == NULL) {
|
||||
if (topology_ctx->map_user_to_sysfs_node_id == NULL) {
|
||||
/* Trade off - num_sysfs_nodes includes all CPU and GPU nodes.
|
||||
* Slightly more memory is allocated than necessary.
|
||||
*/
|
||||
map_user_to_sysfs_node_id = calloc(num_sysfs_nodes, sizeof(uint32_t));
|
||||
if (map_user_to_sysfs_node_id == NULL) {
|
||||
topology_ctx->map_user_to_sysfs_node_id = calloc(topology_ctx->num_sysfs_nodes, sizeof(uint32_t));
|
||||
if (topology_ctx->map_user_to_sysfs_node_id == NULL) {
|
||||
ret = HSAKMT_STATUS_NO_MEMORY;
|
||||
goto err2;
|
||||
}
|
||||
map_user_to_sysfs_node_id_size = num_sysfs_nodes;
|
||||
} else if (num_sysfs_nodes > map_user_to_sysfs_node_id_size) {
|
||||
free(map_user_to_sysfs_node_id);
|
||||
map_user_to_sysfs_node_id = calloc(num_sysfs_nodes, sizeof(uint32_t));
|
||||
if (map_user_to_sysfs_node_id == NULL) {
|
||||
topology_ctx->map_user_to_sysfs_node_id_size = topology_ctx->num_sysfs_nodes;
|
||||
} else if (topology_ctx->num_sysfs_nodes > topology_ctx->map_user_to_sysfs_node_id_size) {
|
||||
free(topology_ctx->map_user_to_sysfs_node_id);
|
||||
topology_ctx->map_user_to_sysfs_node_id = calloc(topology_ctx->num_sysfs_nodes, sizeof(uint32_t));
|
||||
if (topology_ctx->map_user_to_sysfs_node_id == NULL) {
|
||||
ret = HSAKMT_STATUS_NO_MEMORY;
|
||||
goto err2;
|
||||
}
|
||||
map_user_to_sysfs_node_id_size = num_sysfs_nodes;
|
||||
topology_ctx->map_user_to_sysfs_node_id_size = topology_ctx->num_sysfs_nodes;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_sysfs_nodes; i++) {
|
||||
for (uint32_t i = 0; i < topology_ctx->num_sysfs_nodes; i++) {
|
||||
ret = topology_sysfs_check_node_supported(ctx, i, &is_node_supported);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
goto sysfs_parse_failed;
|
||||
if (is_node_supported)
|
||||
map_user_to_sysfs_node_id[num_supported_nodes++] = i;
|
||||
topology_ctx->map_user_to_sysfs_node_id[num_supported_nodes++] = i;
|
||||
}
|
||||
props->NumNodes = num_supported_nodes;
|
||||
|
||||
@@ -815,8 +846,8 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
|
||||
return ret;
|
||||
|
||||
sysfs_parse_failed:
|
||||
free(map_user_to_sysfs_node_id);
|
||||
map_user_to_sysfs_node_id = NULL;
|
||||
free(topology_ctx->map_user_to_sysfs_node_id);
|
||||
topology_ctx->map_user_to_sysfs_node_id = NULL;
|
||||
err2:
|
||||
free(read_buf);
|
||||
err1:
|
||||
@@ -1077,7 +1108,8 @@ err_device_initialize:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
|
||||
static HSAKMT_STATUS topology_sysfs_get_node_props(HsaKFDContext *ctx,
|
||||
uint32_t node_id,
|
||||
HsaNodeProperties *props,
|
||||
bool *p2p_links,
|
||||
uint32_t *num_p2pLinks)
|
||||
@@ -1097,9 +1129,9 @@ static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
|
||||
uint32_t simd_arrays_count = 0;
|
||||
|
||||
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
||||
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
assert(props);
|
||||
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
|
||||
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -1307,7 +1339,9 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
|
||||
static HSAKMT_STATUS topology_sysfs_get_mem_props(
|
||||
struct hsa_kfd_topology_context *topology_ctx,
|
||||
uint32_t node_id,
|
||||
uint32_t mem_id,
|
||||
HsaMemoryProperties *props)
|
||||
{
|
||||
@@ -1322,7 +1356,7 @@ static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id,
|
||||
uint32_t sys_node_id;
|
||||
|
||||
assert(props);
|
||||
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
|
||||
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -1541,7 +1575,9 @@ exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
|
||||
static HSAKMT_STATUS topology_sysfs_get_cache_props(
|
||||
struct hsa_kfd_topology_context *topology_ctx,
|
||||
uint32_t node_id,
|
||||
uint32_t cache_id,
|
||||
HsaCacheProperties *props)
|
||||
{
|
||||
@@ -1556,7 +1592,7 @@ static HSAKMT_STATUS topology_sysfs_get_cache_props(uint32_t node_id,
|
||||
uint32_t sys_node_id;
|
||||
|
||||
assert(props);
|
||||
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
|
||||
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -1619,12 +1655,13 @@ err1:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(uint32_t sys_node_id, uint32_t *user_node_id)
|
||||
static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(struct hsa_kfd_topology_context *topology_ctx,
|
||||
uint32_t sys_node_id, uint32_t *user_node_id)
|
||||
{
|
||||
uint32_t node_id;
|
||||
|
||||
for (node_id = 0; node_id < map_user_to_sysfs_node_id_size; node_id++)
|
||||
if (map_user_to_sysfs_node_id[node_id] == sys_node_id) {
|
||||
for (node_id = 0; node_id < topology_ctx->map_user_to_sysfs_node_id_size; node_id++)
|
||||
if (topology_ctx->map_user_to_sysfs_node_id[node_id] == sys_node_id) {
|
||||
*user_node_id = node_id;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -1652,9 +1689,10 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(HsaKFDContext *ctx,
|
||||
int read_size;
|
||||
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
||||
uint32_t sys_node_id;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
assert(props);
|
||||
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
|
||||
ret = topology_sysfs_map_node_id(topology_ctx, node_id, &sys_node_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -1707,7 +1745,7 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(HsaKFDContext *ctx,
|
||||
memset(props, 0, sizeof(*props));
|
||||
goto err2;
|
||||
}
|
||||
ret = topology_map_sysfs_to_user_node_id(sysfs_node_id, &props->NodeTo);
|
||||
ret = topology_map_sysfs_to_user_node_id(topology_ctx, sysfs_node_id, &props->NodeTo);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
goto err2;
|
||||
} else if (strcmp(prop_name, "weight") == 0)
|
||||
@@ -1974,6 +2012,7 @@ HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx)
|
||||
uint32_t num_ioLinks;
|
||||
bool p2p_links = false;
|
||||
uint32_t num_p2pLinks = 0;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
cpuinfo = calloc(num_procs, sizeof(struct proc_cpuinfo));
|
||||
if (!cpuinfo) {
|
||||
@@ -1996,7 +2035,7 @@ retry:
|
||||
goto err;
|
||||
}
|
||||
for (i = 0; i < sys_props.NumNodes; i++) {
|
||||
ret = topology_sysfs_get_node_props(i,
|
||||
ret = topology_sysfs_get_node_props(ctx, i,
|
||||
&temp_props[i].node,
|
||||
&p2p_links, &num_p2pLinks);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
@@ -2016,7 +2055,7 @@ retry:
|
||||
goto err;
|
||||
}
|
||||
for (mem_id = 0; mem_id < temp_props[i].node.NumMemoryBanks; mem_id++) {
|
||||
ret = topology_sysfs_get_mem_props(i, mem_id, &temp_props[i].mem[mem_id]);
|
||||
ret = topology_sysfs_get_mem_props(topology_ctx, i, mem_id, &temp_props[i].mem[mem_id]);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
free_properties(temp_props, i + 1);
|
||||
goto err;
|
||||
@@ -2032,7 +2071,8 @@ retry:
|
||||
goto err;
|
||||
}
|
||||
for (cache_id = 0; cache_id < temp_props[i].node.NumCaches; cache_id++) {
|
||||
ret = topology_sysfs_get_cache_props(i, cache_id, &temp_props[i].cache[cache_id]);
|
||||
ret = topology_sysfs_get_cache_props(topology_ctx,
|
||||
i, cache_id, &temp_props[i].cache[cache_id]);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
free_properties(temp_props, i + 1);
|
||||
goto err;
|
||||
@@ -2122,62 +2162,72 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (!g_system) {
|
||||
g_system = malloc(sizeof(HsaSystemProperties));
|
||||
if (!g_system) {
|
||||
if (!topology_ctx->system_props) {
|
||||
topology_ctx->system_props = malloc(sizeof(HsaSystemProperties));
|
||||
if (!topology_ctx->system_props) {
|
||||
free_properties(temp_props, sys_props.NumNodes);
|
||||
ret = HSAKMT_STATUS_NO_MEMORY;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
*g_system = sys_props;
|
||||
if (g_props)
|
||||
free(g_props);
|
||||
g_props = temp_props;
|
||||
*topology_ctx->system_props = sys_props;
|
||||
if (topology_ctx->node_props)
|
||||
free(topology_ctx->node_props);
|
||||
topology_ctx->node_props = temp_props;
|
||||
err:
|
||||
free(cpuinfo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Drop the Snapshot of the HSA topology information. Assume lock is held. */
|
||||
void topology_drop_snapshot(void)
|
||||
void topology_drop_snapshot(HsaKFDContext *ctx)
|
||||
{
|
||||
if (!!g_system != !!g_props)
|
||||
struct hsa_kfd_topology_context *topology_ctx =
|
||||
hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!!topology_ctx->system_props != !!topology_ctx->node_props)
|
||||
pr_warn("Probably inconsistency?\n");
|
||||
|
||||
if (g_props) {
|
||||
if (topology_ctx->node_props) {
|
||||
/* Remove state */
|
||||
free_properties(g_props, g_system->NumNodes);
|
||||
g_props = NULL;
|
||||
free_properties(topology_ctx->node_props, topology_ctx->system_props->NumNodes);
|
||||
topology_ctx->node_props = NULL;
|
||||
}
|
||||
|
||||
free(g_system);
|
||||
g_system = NULL;
|
||||
free(topology_ctx->system_props);
|
||||
topology_ctx->system_props = NULL;
|
||||
|
||||
if (map_user_to_sysfs_node_id) {
|
||||
free(map_user_to_sysfs_node_id);
|
||||
map_user_to_sysfs_node_id = NULL;
|
||||
map_user_to_sysfs_node_id_size = 0;
|
||||
if (topology_ctx->map_user_to_sysfs_node_id) {
|
||||
free(topology_ctx->map_user_to_sysfs_node_id);
|
||||
topology_ctx->map_user_to_sysfs_node_id = NULL;
|
||||
topology_ctx->map_user_to_sysfs_node_id_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid(HsaKFDContext *ctx, uint32_t nodeid, uint32_t *gpu_id)
|
||||
{
|
||||
if (!g_props || !g_system || g_system->NumNodes <= nodeid)
|
||||
struct hsa_kfd_topology_context *topology_ctx =
|
||||
hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!topology_ctx->node_props || !topology_ctx->system_props ||
|
||||
topology_ctx->system_props->NumNodes <= nodeid)
|
||||
return HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
if (gpu_id)
|
||||
*gpu_id = g_props[nodeid].node.KFDGpuID;
|
||||
*gpu_id = topology_ctx->node_props[nodeid].node.KFDGpuID;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
|
||||
HSAKMT_STATUS hsakmt_gpuid_to_nodeid(HsaKFDContext *ctx, uint32_t gpu_id, uint32_t *node_id)
|
||||
{
|
||||
uint64_t node_idx;
|
||||
|
||||
for (node_idx = 0; node_idx < g_system->NumNodes; node_idx++) {
|
||||
if (g_props[node_idx].node.KFDGpuID == gpu_id) {
|
||||
struct hsa_kfd_topology_context *topology_ctx =
|
||||
hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
for (node_idx = 0; node_idx < topology_ctx->system_props->NumNodes; node_idx++) {
|
||||
if (topology_ctx->node_props[node_idx].node.KFDGpuID == gpu_id) {
|
||||
*node_id = node_idx;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -2193,6 +2243,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
struct hsa_kfd_topology_context *topology_ctx =
|
||||
hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!SystemProperties)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -2202,8 +2254,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
|
||||
/* We already have a valid snapshot. Avoid double initialization that
|
||||
* would leak memory.
|
||||
*/
|
||||
if (g_system) {
|
||||
*SystemProperties = *g_system;
|
||||
if (topology_ctx->system_props) {
|
||||
*SystemProperties = *topology_ctx->system_props;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -2211,23 +2263,23 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
|
||||
assert(g_system);
|
||||
assert(topology_ctx->system_props);
|
||||
|
||||
if (hsakmt_use_model)
|
||||
model_init();
|
||||
|
||||
err = hsakmt_fmm_init_process_apertures(ctx, g_system->NumNodes);
|
||||
err = hsakmt_fmm_init_process_apertures(ctx, topology_ctx->system_props->NumNodes);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto init_process_apertures_failed;
|
||||
|
||||
err = hsakmt_init_process_doorbells(ctx, g_system->NumNodes);
|
||||
err = hsakmt_init_process_doorbells(ctx, topology_ctx->system_props->NumNodes);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto init_doorbells_failed;
|
||||
|
||||
*SystemProperties = *g_system;
|
||||
*SystemProperties = *topology_ctx->system_props;
|
||||
|
||||
for (int node = 0; node < g_system->NumNodes; node++) {
|
||||
if (hsakmt_get_gfxv_by_node_id(node) == GFX_VERSION_GFX1151 &&
|
||||
for (int node = 0; node < topology_ctx->system_props->NumNodes; node++) {
|
||||
if (hsakmt_get_gfxv_by_node_id(ctx, node) == GFX_VERSION_GFX1151 &&
|
||||
hsakmt_kfd_version_info.KernelInterfaceMajorVersion == 1 &&
|
||||
hsakmt_kfd_version_info.KernelInterfaceMinorVersion < 20)
|
||||
pr_err_once("WARNING: KFD ABI 1.20+ is recommended for gfx1151. Current KFD ABI is %i.%i. This may result in faults, crashes and other application instability\n", hsakmt_kfd_version_info.KernelInterfaceMajorVersion, hsakmt_kfd_version_info.KernelInterfaceMinorVersion);
|
||||
@@ -2238,7 +2290,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
|
||||
init_doorbells_failed:
|
||||
hsakmt_fmm_destroy_process_apertures(ctx);
|
||||
init_process_apertures_failed:
|
||||
topology_drop_snapshot();
|
||||
topology_drop_snapshot(ctx);
|
||||
|
||||
out:
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
@@ -2251,20 +2303,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemPropertiesCtx(HsaKFDContext *ctx)
|
||||
|
||||
hsakmt_destroy_process_doorbells(ctx);
|
||||
hsakmt_fmm_destroy_process_apertures(ctx);
|
||||
topology_drop_snapshot();
|
||||
topology_drop_snapshot(ctx);
|
||||
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS hsakmt_topology_get_node_props(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HsaNodeProperties *NodeProperties)
|
||||
{
|
||||
if (!g_system || !g_props || NodeId >= g_system->NumNodes)
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!topology_ctx->system_props || !topology_ctx->node_props ||
|
||||
NodeId >= topology_ctx->system_props->NumNodes)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
*NodeProperties = g_props[NodeId].node;
|
||||
*NodeProperties = topology_ctx->node_props[NodeId].node;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -2282,11 +2338,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodePropertiesCtx(HsaKFDContext *ctx,
|
||||
CHECK_KFD_OPEN();
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
|
||||
err = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
err = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
|
||||
err = hsakmt_topology_get_node_props(NodeId, NodeProperties);
|
||||
err = hsakmt_topology_get_node_props(ctx, NodeId, NodeProperties);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
/* For CPU only node don't add any additional GPU memory banks. */
|
||||
@@ -2314,6 +2370,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
|
||||
uint32_t i, gpu_id;
|
||||
HSAuint64 aperture_limit;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
node_props_t *node_props = topology_ctx->node_props;
|
||||
|
||||
if (!MemoryProperties)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -2321,15 +2379,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
|
||||
CHECK_KFD_OPEN();
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
|
||||
err = hsakmt_validate_nodeid(NodeId, &gpu_id);
|
||||
err = hsakmt_validate_nodeid(ctx, NodeId, &gpu_id);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
|
||||
memset(MemoryProperties, 0, NumBanks * sizeof(HsaMemoryProperties));
|
||||
|
||||
for (i = 0; i < MIN(g_props[NodeId].node.NumMemoryBanks, NumBanks); i++) {
|
||||
assert(g_props[NodeId].mem);
|
||||
MemoryProperties[i] = g_props[NodeId].mem[i];
|
||||
for (i = 0; i < MIN(node_props[NodeId].node.NumMemoryBanks, NumBanks); i++) {
|
||||
assert(node_props[NodeId].mem);
|
||||
MemoryProperties[i] = node_props[NodeId].mem[i];
|
||||
}
|
||||
|
||||
/* The following memory banks does not apply to CPU only node */
|
||||
@@ -2341,7 +2399,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_LDS, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
|
||||
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LDSSizeInKB * 1024;
|
||||
MemoryProperties[i].SizeInBytes = node_props[NodeId].node.LDSSizeInKB * 1024;
|
||||
i++;
|
||||
}
|
||||
|
||||
@@ -2349,12 +2407,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
|
||||
* For dGPU the topology node contains Local Memory and it is added by
|
||||
* the for loop above
|
||||
*/
|
||||
if (hsakmt_get_gfxv_by_node_id(NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
|
||||
g_props[NodeId].node.LocalMemSize > 0 &&
|
||||
if (hsakmt_get_gfxv_by_node_id(ctx, NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
|
||||
node_props[NodeId].node.LocalMemSize > 0 &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_GPUVM, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
|
||||
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LocalMemSize;
|
||||
MemoryProperties[i].SizeInBytes = node_props[NodeId].node.LocalMemSize;
|
||||
i++;
|
||||
}
|
||||
|
||||
@@ -2368,7 +2426,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
|
||||
}
|
||||
|
||||
/* Add SVM aperture */
|
||||
if (hsakmt_topology_is_svm_needed(g_props[NodeId].node.EngineId) && i < NumBanks &&
|
||||
if (hsakmt_topology_is_svm_needed(node_props[NodeId].node.EngineId) && i < NumBanks &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx,
|
||||
FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
|
||||
&aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
@@ -2399,6 +2457,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCachePropertiesCtx(HsaKFDContext *ctx,
|
||||
{
|
||||
HSAKMT_STATUS err;
|
||||
uint32_t i;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!CacheProperties)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -2407,19 +2466,19 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCachePropertiesCtx(HsaKFDContext *ctx,
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
|
||||
/* KFD ADD page 18, snapshot protocol violation */
|
||||
if (!g_system || NodeId >= g_system->NumNodes) {
|
||||
if (!topology_ctx->system_props || NodeId >= topology_ctx->system_props->NumNodes) {
|
||||
err = HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (NumCaches > g_props[NodeId].node.NumCaches) {
|
||||
if (NumCaches > topology_ctx->node_props[NodeId].node.NumCaches) {
|
||||
err = HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < MIN(g_props[NodeId].node.NumCaches, NumCaches); i++) {
|
||||
assert(g_props[NodeId].cache);
|
||||
CacheProperties[i] = g_props[NodeId].cache[i];
|
||||
for (i = 0; i < MIN(topology_ctx->node_props[NodeId].node.NumCaches, NumCaches); i++) {
|
||||
assert(topology_ctx->node_props[NodeId].cache);
|
||||
CacheProperties[i] = topology_ctx->node_props[NodeId].cache[i];
|
||||
}
|
||||
|
||||
err = HSAKMT_STATUS_SUCCESS;
|
||||
@@ -2429,14 +2488,18 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint32 NumIoLinks,
|
||||
HsaIoLinkProperties *IoLinkProperties)
|
||||
{
|
||||
if (!g_system || !g_props || NodeId >= g_system->NumNodes)
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!topology_ctx->system_props || !topology_ctx->node_props ||
|
||||
NodeId >= topology_ctx->system_props->NumNodes)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
memcpy(IoLinkProperties, g_props[NodeId].link,
|
||||
memcpy(IoLinkProperties, topology_ctx->node_props[NodeId].link,
|
||||
NumIoLinks * sizeof(*IoLinkProperties));
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
@@ -2448,6 +2511,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkPropertiesCtx(HsaKFDContext *ctx,
|
||||
HsaIoLinkProperties *IoLinkProperties)
|
||||
{
|
||||
HSAKMT_STATUS err;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!IoLinkProperties)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -2457,79 +2521,85 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkPropertiesCtx(HsaKFDContext *ctx,
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
|
||||
/* KFD ADD page 18, snapshot protocol violation */
|
||||
if (!g_system || NodeId >= g_system->NumNodes ) {
|
||||
if (!topology_ctx->system_props || NodeId >= topology_ctx->system_props->NumNodes ) {
|
||||
err = HSAKMT_STATUS_INVALID_NODE_UNIT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (NumIoLinks > g_props[NodeId].node.NumIOLinks) {
|
||||
if (NumIoLinks > topology_ctx->node_props[NodeId].node.NumIOLinks) {
|
||||
err = HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
goto out;
|
||||
}
|
||||
|
||||
assert(g_props[NodeId].link);
|
||||
err = hsakmt_topology_get_iolink_props(NodeId, NumIoLinks, IoLinkProperties);
|
||||
assert(topology_ctx->node_props[NodeId].link);
|
||||
err = hsakmt_topology_get_iolink_props(ctx, NodeId, NumIoLinks, IoLinkProperties);
|
||||
|
||||
out:
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
uint32_t hsakmt_get_gfxv_by_node_id(HSAuint32 node_id)
|
||||
uint32_t hsakmt_get_gfxv_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id)
|
||||
{
|
||||
return HSA_GET_GFX_VERSION_FULL(g_props[node_id].node.EngineId.ui32);
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
return HSA_GET_GFX_VERSION_FULL(topology_ctx->node_props[node_id].node.EngineId.ui32);
|
||||
}
|
||||
|
||||
uint16_t hsakmt_get_device_id_by_node_id(HSAuint32 node_id)
|
||||
uint16_t hsakmt_get_device_id_by_node_id(HsaKFDContext *ctx, HSAuint32 node_id)
|
||||
{
|
||||
if (!g_props || !g_system || g_system->NumNodes <= node_id)
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
if (!topology_ctx->node_props || !topology_ctx->system_props ||
|
||||
topology_ctx->system_props->NumNodes <= node_id)
|
||||
return 0;
|
||||
|
||||
return g_props[node_id].node.DeviceId;
|
||||
return topology_ctx->node_props[node_id].node.DeviceId;
|
||||
}
|
||||
|
||||
bool hsakmt_prefer_ats(HSAuint32 node_id)
|
||||
bool hsakmt_prefer_ats(HsaKFDContext *ctx, HSAuint32 node_id)
|
||||
{
|
||||
return g_props[node_id].node.Capability.ui32.HSAMMUPresent
|
||||
&& g_props[node_id].node.NumCPUCores
|
||||
&& g_props[node_id].node.NumFComputeCores;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
return topology_ctx->node_props[node_id].node.Capability.ui32.HSAMMUPresent
|
||||
&& topology_ctx->node_props[node_id].node.NumCPUCores
|
||||
&& topology_ctx->node_props[node_id].node.NumFComputeCores;
|
||||
}
|
||||
|
||||
uint16_t hsakmt_get_device_id_by_gpu_id(HSAuint32 gpu_id)
|
||||
uint16_t hsakmt_get_device_id_by_gpu_id(HsaKFDContext *ctx, HSAuint32 gpu_id)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (!g_props || !g_system)
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
if (!topology_ctx->node_props || !topology_ctx->system_props)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < g_system->NumNodes; i++) {
|
||||
if (g_props[i].node.KFDGpuID == gpu_id)
|
||||
return g_props[i].node.DeviceId;
|
||||
for (i = 0; i < topology_ctx->system_props->NumNodes; i++) {
|
||||
if (topology_ctx->node_props[i].node.KFDGpuID == gpu_id)
|
||||
return topology_ctx->node_props[i].node.DeviceId;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t hsakmt_get_direct_link_cpu(uint32_t gpu_node)
|
||||
uint32_t hsakmt_get_direct_link_cpu(HsaKFDContext *ctx, HSAuint32 gpu_node)
|
||||
{
|
||||
HSAuint64 size = 0;
|
||||
int32_t cpu_id;
|
||||
HSAuint32 i;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
|
||||
cpu_id = gpu_get_direct_link_cpu(gpu_node, g_props);
|
||||
cpu_id = gpu_get_direct_link_cpu(gpu_node, topology_ctx->node_props);
|
||||
if (cpu_id == -1)
|
||||
return INVALID_NODEID;
|
||||
|
||||
assert(g_props[cpu_id].mem);
|
||||
|
||||
for (i = 0; i < g_props[cpu_id].node.NumMemoryBanks; i++)
|
||||
size += g_props[cpu_id].mem[i].SizeInBytes;
|
||||
assert(topology_ctx->node_props[cpu_id].mem);
|
||||
for (i = 0; i < topology_ctx->node_props[cpu_id].node.NumMemoryBanks; i++)
|
||||
size += topology_ctx->node_props[cpu_id].mem[i].SizeInBytes;
|
||||
|
||||
return size ? (uint32_t)cpu_id : INVALID_NODEID;
|
||||
}
|
||||
|
||||
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid_array(HsaKFDContext *ctx,
|
||||
uint32_t **gpu_id_array,
|
||||
uint32_t NumberOfNodes, uint32_t *NodeArray)
|
||||
{
|
||||
HSAKMT_STATUS ret;
|
||||
@@ -2543,7 +2613,7 @@ HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
|
||||
if (!(*gpu_id_array))
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
for (i = 0; i < NumberOfNodes; i++) {
|
||||
ret = hsakmt_validate_nodeid(NodeArray[i], *gpu_id_array + i);
|
||||
ret = hsakmt_validate_nodeid(ctx, NodeArray[i], *gpu_id_array + i);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
free(*gpu_id_array);
|
||||
break;
|
||||
@@ -2553,13 +2623,13 @@ HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline uint32_t hsakmt_get_num_sysfs_nodes(void)
|
||||
uint32_t hsakmt_get_num_sysfs_nodes(HsaKFDContext *ctx)
|
||||
{
|
||||
return num_sysfs_nodes;
|
||||
struct hsa_kfd_topology_context *topology_ctx = hsakmt_kfdcontext_get_topology_context(ctx);
|
||||
return topology_ctx->num_sysfs_nodes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
|
||||
{
|
||||
return hsaKmtAcquireSystemPropertiesCtx(&hsakmt_primary_kfd_ctx, SystemProperties);
|
||||
|
||||
新增問題並參考
封鎖使用者