libhsakmt: Refactor for Multi-KFD Context Support (Multiple KFD FDs per Process) (#1701)

* Introduce HsaKFDContext structure and infrastructure for multiple KFD contexts, enabling
   independent contexts within a single process.
* Refactor core components (queue, event, FMM, topology) to be context-aware,
   using explicit HsaKFDContext parameters instead of global state.
* Replace global hsakmt_kfd_fd with context-specific file descriptors, ensuring full context isolation.
* Maintain backward compatibility by redirecting legacy APIs to use the primary context.

This refactoring establishes a foundation for multi-context support while preserving existing functionality.

Signed-off-by: Junhua Shen <Junhua.Shen@amd.com>
This commit is contained in:
Junhua Shen
2025-11-10 11:19:58 +08:00
committed by GitHub
parent 324a5519b9
commit 9da1572c42
21 changed files with 2377 additions and 683 deletions
@@ -130,7 +130,8 @@ set ( HSAKMT_SRC "src/debug.c"
"src/version.c"
"src/svm.c"
"src/pc_sampling.c"
"src/ais.c")
"src/ais.c"
"src/kfdcontext.c")
## Declare the library target name
add_library (${HSAKMT_TARGET} STATIC "")
+2 -2
View File
@@ -47,7 +47,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile(void *MemoryAddress,
/* Support is only for dGPUs */
if (!hsakmt_fmm_get_handle(MemoryAddress, &handle, &size_offset)) {
if (!hsakmt_fmm_get_handle(&hsakmt_primary_kfd_ctx, MemoryAddress, &handle, &size_offset)) {
pr_err("Address/size out of range: %p/%lu\n", MemoryAddress, MemorySizeInBytes);
return HSAKMT_STATUS_INVALID_PARAMETER;
}
@@ -66,7 +66,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile(void *MemoryAddress,
}
args.in.handle_offset = size_offset;
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AIS_OP, &args);
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_AIS_OP, &args);
if (SizeCopiedInBytes)
*SizeCopiedInBytes = args.out.size_copied;
+14 -14
View File
@@ -78,7 +78,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
args.gpu_id = gpu_id;
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_REGISTER_DEPRECATED, &args);
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_REGISTER_DEPRECATED, &args);
if (err == 0)
result = HSAKMT_STATUS_SUCCESS;
@@ -105,7 +105,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId)
struct kfd_ioctl_dbg_unregister_args args = {0};
args.gpu_id = gpu_id;
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, &args);
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, &args);
if (err)
return HSAKMT_STATUS_ERROR;
@@ -168,7 +168,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId,
run_ptr += sizeof(DbgWaveMsgRing->MemoryVA);
/* send to kernel */
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, args);
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, args);
free(args);
@@ -256,7 +256,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
}
/* send to kernel */
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, args);
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, args);
free(args);
@@ -316,7 +316,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
((setupTtmp) ? KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK : 0);
args.r_debug = (HSAuint64)rDebug;
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
if (err) {
if (errno == EBUSY)
@@ -340,7 +340,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
memset(&args, 0x00, sizeof(args));
args.mode_mask = 0; //Disable
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
@@ -363,7 +363,7 @@ static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
args.device_snapshot.entry_size = entry_size;
args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
*n_entries = args.device_snapshot.num_devices;
@@ -384,7 +384,7 @@ static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
args.queue_snapshot.snapshot_buf_ptr = (uint64_t) data;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
*n_entries = args.queue_snapshot.num_queues;
@@ -410,7 +410,7 @@ static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
args.pid = getpid();
r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args);
r = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args);
if (r < 0)
return HSAKMT_STATUS_ERROR;
@@ -429,7 +429,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
*data_size = sizeof(struct kfd_runtime_info);
args.enable.rinfo_size = *data_size;
args.enable.dbg_fd = hsakmt_kfd_fd;
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
*runtime_info = malloc(args.enable.rinfo_size);
if (!*runtime_info)
return HSAKMT_STATUS_NO_MEMORY;
@@ -437,7 +437,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
args.op = KFD_IOC_DBG_TRAP_ENABLE;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args)) {
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args)) {
free(*runtime_info);
return HSAKMT_STATUS_ERROR;
}
@@ -450,11 +450,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
args.enable.dbg_fd = hsakmt_kfd_fd;
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
args.op = KFD_IOC_DBG_TRAP_DISABLE;
args.pid = getpid();
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
@@ -540,7 +540,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *arg
free(queue_ids);
}
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, args);
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, args);
if (DebugReturn)
*DebugReturn = err;
+166 -55
View File
@@ -34,12 +34,36 @@
#include "hsakmt/linux/kfd_ioctl.h"
#include "fmm.h"
#include "hsakmt/hsakmtmodel.h"
#include <assert.h>
static HSAuint64 *events_page = NULL;
void hsakmt_clear_events_page(void)
struct hsa_kfd_event_context
{
events_page = NULL;
HSAuint64 *events_page;
};
struct hsa_kfd_event_context *hsakmt_kfdcontext_get_event_context(HsaKFDContext *ctx)
{
assert(ctx);
if (ctx->event_context)
return ctx->event_context;
ctx->event_context = calloc(1, sizeof(struct hsa_kfd_event_context));
if (!ctx->event_context) {
pr_err("Alloc memory failed for struct hsa_kfd_event_context size %zu\n",
sizeof(struct hsa_kfd_event_context));
return NULL;
}
return ctx->event_context;
}
void hsakmt_clear_events_page(HsaKFDContext *ctx)
{
struct hsa_kfd_event_context *event_ctx = hsakmt_kfdcontext_get_event_context(ctx);
if (event_ctx) {
event_ctx->events_page = NULL;
}
}
static bool IsSystemEventType(HSA_EVENTTYPE type)
@@ -48,14 +72,18 @@ static bool IsSystemEventType(HSA_EVENTTYPE type)
return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
bool ManualReset, bool IsSignaled,
HsaEvent **Event)
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEventCtx(HsaKFDContext *ctx,
HsaEventDescriptor *EventDesc,
bool ManualReset, bool IsSignaled,
HsaEvent **Event)
{
unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT;
CHECK_KFD_OPEN();
struct hsa_kfd_event_context *event_ctx = NULL;
HSAuint64 *events_page = NULL;
if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -74,9 +102,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
/* dGPU code */
pthread_mutex_lock(&hsakmt_mutex);
event_ctx = hsakmt_kfdcontext_get_event_context(ctx);
events_page = event_ctx->events_page;
if (hsakmt_is_dgpu && !events_page) {
events_page = hsakmt_allocate_exec_aligned_memory_gpu(
events_page = hsakmt_allocate_exec_aligned_memory_gpu(ctx,
KFD_SIGNAL_EVENT_LIMIT * 8, PAGE_SIZE, 0, 0, true, false, true);
if (!events_page) {
free(e);
@@ -86,10 +116,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
if (hsakmt_use_model)
model_set_event_page(events_page, KFD_SIGNAL_EVENT_LIMIT);
else
hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset, NULL);
hsakmt_fmm_get_handle(ctx, events_page, (uint64_t *)&args.event_page_offset, NULL);
}
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
free(e);
*Event = NULL;
pthread_mutex_unlock(&hsakmt_mutex);
@@ -100,17 +130,17 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
if (!events_page && args.event_page_offset > 0) {
events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ,
MAP_SHARED, hsakmt_kfd_fd, args.event_page_offset);
MAP_SHARED, ctx->fd, args.event_page_offset);
if (events_page == MAP_FAILED) {
/* old kernels only support 256 events */
event_limit = 256;
events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ,
MAP_SHARED, hsakmt_kfd_fd, args.event_page_offset);
MAP_SHARED, ctx->fd, args.event_page_offset);
}
if (events_page == MAP_FAILED) {
events_page = NULL;
pthread_mutex_unlock(&hsakmt_mutex);
hsaKmtDestroyEvent(e);
hsaKmtDestroyEventCtx(ctx, e);
return HSAKMT_STATUS_ERROR;
}
}
@@ -118,10 +148,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
if (args.event_page_offset > 0 && args.event_slot_index < event_limit)
e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];
pthread_mutex_unlock(&hsakmt_mutex);
pthread_mutex_unlock(&hsakmt_mutex);
e->EventData.EventType = EventDesc->EventType;
e->EventData.HWData1 = args.event_id;
e->EventData.EventType = EventDesc->EventType;
e->EventData.HWData1 = args.event_id;
e->EventData.HWData3 = args.event_trigger_data;
e->EventData.EventData.SyncVar.SyncVar.UserData =
@@ -134,19 +164,21 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
set_args.event_id = args.event_id;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_EVENT,
&set_args) != 0) {
hsaKmtDestroyEvent(e);
return HSAKMT_STATUS_ERROR;
}
}
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_EVENT, &set_args) != 0) {
hsaKmtDestroyEventCtx(ctx, e);
return HSAKMT_STATUS_ERROR;
}
}
*Event = e;
*Event = e;
if (!event_ctx->events_page)
event_ctx->events_page = events_page;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEventCtx(HsaKFDContext *ctx,
HsaEvent *Event)
{
CHECK_KFD_OPEN();
@@ -157,14 +189,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
args.event_id = Event->EventId;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
return HSAKMT_STATUS_ERROR;
free(Event);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEventCtx(HsaKFDContext *ctx,
HsaEvent *Event)
{
CHECK_KFD_OPEN();
@@ -181,13 +214,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
args.event_id = Event->EventId;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEventCtx(HsaKFDContext *ctx,
HsaEvent *Event)
{
CHECK_KFD_OPEN();
@@ -204,13 +238,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
args.event_id = Event->EventId;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventStateCtx(HsaKFDContext *ctx,
HsaEvent *Event)
{
CHECK_KFD_OPEN();
@@ -220,22 +255,25 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
HSAuint32 Milliseconds)
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEventCtx(HsaKFDContext *ctx,
HsaEvent *Event,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
return hsaKmtWaitOnEvent_ExtCtx(ctx, Event, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
HSAuint32 Milliseconds, uint64_t *event_age)
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_ExtCtx(HsaKFDContext *ctx,
HsaEvent *Event,
HSAuint32 Milliseconds, uint64_t *event_age)
{
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1, true, Milliseconds, event_age);
return hsaKmtWaitOnMultipleEvents_ExtCtx(ctx, &Event,
1, true, Milliseconds, event_age);
}
static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
static HSAKMT_STATUS get_mem_info_svm_api(HsaKFDContext *ctx, uint64_t address, uint32_t gpu_id)
{
struct kfd_ioctl_svm_args *args;
uint32_t node_id = 0;
@@ -258,7 +296,7 @@ static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
args->op = KFD_IOCTL_SVM_OP_GET_ATTR;
args->nattr = s_attr / sizeof(*attrs);
memcpy(args->attrs, attrs, s_attr);
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
pr_debug("op get range attrs failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
@@ -312,8 +350,8 @@ static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
return HSAKMT_STATUS_SUCCESS;
}
//Analysis memory exception data, print debug messages
static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
memory_exception_data)
static void analysis_memory_exception(HsaKFDContext *ctx,
struct kfd_hsa_memory_exception_data *memory_exception_data)
{
HSAKMT_STATUS ret;
HsaPointerInfo info;
@@ -331,9 +369,9 @@ static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
else if (memory_exception_data->failure.NoExecute)
pr_err("Execute to none-executable page\n");
ret = hsakmt_fmm_get_mem_info((const void *)addr, &info);
ret = hsakmt_fmm_get_mem_info(ctx, (const void *)addr, &info);
if (ret != HSAKMT_STATUS_SUCCESS) {
ret = get_mem_info_svm_api(addr, memory_exception_data->gpu_id);
ret = get_mem_info_svm_api(ctx, addr, memory_exception_data->gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS)
pr_err("Address does not belong to a known buffer\n");
return;
@@ -378,19 +416,22 @@ static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
}
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds)
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEventsCtx(HsaKFDContext *ctx,
HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents, WaitOnAll, Milliseconds, NULL);
return hsaKmtWaitOnMultipleEvents_ExtCtx(ctx, Events,
NumEvents, WaitOnAll, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds,
uint64_t *event_age)
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_ExtCtx(HsaKFDContext *ctx,
HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds,
uint64_t *event_age)
{
HSAKMT_STATUS result;
CHECK_KFD_OPEN();
@@ -417,7 +458,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
args.num_events = NumEvents;
args.events_ptr = (uint64_t)(uintptr_t)event_data;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
result = HSAKMT_STATUS_ERROR;
else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT)
result = HSAKMT_STATUS_WAIT_TIMEOUT;
@@ -438,7 +479,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
Events[i]->EventData.EventData.MemoryAccessFault.Failure.ECC =
((event_data[i].memory_exception_data.ErrorType == 1) || (event_data[i].memory_exception_data.ErrorType == 2)) ? 1 : 0;
Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
analysis_memory_exception(&event_data[i].memory_exception_data);
analysis_memory_exception(ctx, &event_data[i].memory_exception_data);
} else if (Events[i]->EventData.EventType == HSA_EVENTTYPE_HW_EXCEPTION &&
event_data[i].hw_exception_data.gpu_id) {
@@ -464,7 +505,7 @@ out:
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMICtx(HsaKFDContext *ctx, HSAuint32 NodeId, int *fd)
{
struct kfd_ioctl_smi_events_args args;
HSAKMT_STATUS result;
@@ -481,7 +522,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
}
args.gpuid = gpuid;
result = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SMI_EVENTS, &args);
result = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SMI_EVENTS, &args);
if (result) {
pr_debug("open SMI event fd failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
@@ -490,3 +531,73 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
*fd = args.anon_fd;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
bool ManualReset, bool IsSignaled,
HsaEvent **Event)
{
return hsaKmtCreateEventCtx(&hsakmt_primary_kfd_ctx, EventDesc, ManualReset,
IsSignaled, Event);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
{
return hsaKmtDestroyEventCtx(&hsakmt_primary_kfd_ctx, Event);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
{
return hsaKmtSetEventCtx(&hsakmt_primary_kfd_ctx, Event);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
{
return hsaKmtResetEventCtx(&hsakmt_primary_kfd_ctx, Event);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
{
return hsaKmtQueryEventStateCtx(&hsakmt_primary_kfd_ctx, Event);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
HSAuint32 Milliseconds, uint64_t *event_age)
{
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1,
true, Milliseconds, event_age);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents,
WaitOnAll, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds,
uint64_t *event_age)
{
return hsaKmtWaitOnMultipleEvents_ExtCtx(&hsakmt_primary_kfd_ctx,
Events, NumEvents, WaitOnAll, Milliseconds, event_age);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
{
return hsaKmtOpenSMICtx(&hsakmt_primary_kfd_ctx, NodeId, fd);
}
File diff suppressed because it is too large Load Diff
+99 -45
View File
@@ -45,59 +45,113 @@ typedef struct {
void *start_address;
} aperture_properties_t;
HSAKMT_STATUS hsakmt_fmm_get_amdgpu_device_handle(uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes);
void hsakmt_fmm_destroy_process_apertures(void);
HSAKMT_STATUS hsakmt_fmm_get_amdgpu_device_handle(HsaKFDContext *ctx,
uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
HSAKMT_STATUS hsakmt_fmm_init_process_apertures(HsaKFDContext *ctx, unsigned int NumNodes);
void hsakmt_fmm_destroy_process_apertures(HsaKFDContext *ctx);
/* Memory interface */
void *hsakmt_fmm_allocate_scratch(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes);
void *hsakmt_fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address,
uint64_t MemorySizeInBytes, uint64_t alignment, HsaMemFlags flags);
void *hsakmt_fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset);
void *hsakmt_fmm_allocate_host(uint32_t gpu_id, uint32_t node_id, void *address, uint64_t MemorySizeInBytes,
uint64_t alignment, HsaMemFlags flags);
void hsakmt_fmm_print(uint32_t node);
HSAKMT_STATUS hsakmt_fmm_release(void *address);
HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
int hsakmt_fmm_unmap_from_gpu(void *address);
bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offset);
HSAKMT_STATUS hsakmt_fmm_get_mem_info(const void *address, HsaPointerInfo *info);
HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(const void *mem, void *usr_data);
// Memory allocation/free functions
void *hsakmt_fmm_allocate_scratch(HsaKFDContext *ctx,
uint32_t gpu_id,
void *address,
uint64_t MemorySizeInBytes);
void *hsakmt_fmm_allocate_device(HsaKFDContext *ctx,
uint32_t gpu_id,
uint32_t node_id,
void *address,
uint64_t MemorySizeInBytes,
uint64_t alignment,
HsaMemFlags flags);
void *hsakmt_fmm_allocate_host(HsaKFDContext *ctx,
uint32_t gpu_id,
uint32_t node_id,
void *address,
uint64_t MemorySizeInBytes,
uint64_t alignment,
HsaMemFlags flags);
void *hsakmt_fmm_allocate_doorbell(HsaKFDContext *ctx,
uint32_t gpu_id,
uint64_t MemorySizeInBytes,
uint64_t doorbell_offset);
void hsakmt_fmm_print(HsaKFDContext *ctx, uint32_t node);
HSAKMT_STATUS hsakmt_fmm_release(HsaKFDContext *ctx, void *address);
// Memory mmap/munmap functions
HSAKMT_STATUS hsakmt_fmm_map_to_gpu(HsaKFDContext *ctx,
void *address,
uint64_t size,
uint64_t *gpuvm_address);
HSAKMT_STATUS hsakmt_fmm_map_to_gpu_nodes(HsaKFDContext *ctx,
void *address,
uint64_t size,
uint32_t *nodes_to_map,
uint64_t num_of_nodes,
uint64_t *gpuvm_address);
int hsakmt_fmm_unmap_from_gpu(HsaKFDContext *ctx, void *address);
// Memory register/deregister functions
HSAKMT_STATUS hsakmt_fmm_register_memory(HsaKFDContext *ctx,
void *address, uint64_t size_in_bytes,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size,
HsaMemFlags flags);
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HsaKFDContext *ctx,
HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size,
HSA_REGISTER_MEM_FLAGS RegisterFlags);
HSAKMT_STATUS hsakmt_fmm_deregister_memory(HsaKFDContext *ctx, void *address);
// Memory export functions
HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
int *DMABufFd,
HSAuint64 *Offset);
HSAKMT_STATUS hsakmt_fmm_share_memory(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 SizeInBytes,
HsaSharedMemoryHandle *SharedMemoryHandle);
HSAKMT_STATUS hsakmt_fmm_register_shared_memory(HsaKFDContext *ctx,
const HsaSharedMemoryHandle *SharedMemoryHandle,
HSAuint64 *SizeInBytes,
void **MemoryAddress,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size);
bool hsakmt_fmm_get_handle(HsaKFDContext *ctx,
void *address,
uint64_t *handle,
uint64_t *size_offset);
HSAKMT_STATUS hsakmt_fmm_get_mem_info(HsaKFDContext *ctx,
const void *address,
HsaPointerInfo *info);
HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(HsaKFDContext *ctx,
const void *mem,
void *usr_data);
#ifdef SANITIZER_AMDGPU
HSAKMT_STATUS hsakmt_fmm_replace_asan_header_page(void* address);
HSAKMT_STATUS hsakmt_fmm_return_asan_header_page(void* address);
HSAKMT_STATUS hsakmt_fmm_replace_asan_header_page(HsaKFDContext *ctx, void* address);
HSAKMT_STATUS hsakmt_fmm_return_asan_header_page(HsaKFDContext *ctx, void* address);
#endif
/* Topology interface*/
HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSAuint32 gpu_id,
HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(HsaKFDContext *ctx,
aperture_type_e aperture_type, HSAuint32 gpu_id,
HSAuint64 *aperture_base, HSAuint64 *aperture_limit);
HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size,
HsaMemFlags flags);
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size,
HSA_REGISTER_MEM_FLAGS RegisterFlags);
HSAKMT_STATUS hsakmt_fmm_deregister_memory(void *address);
HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
int *DMABufFd,
HSAuint64 *Offset);
HSAKMT_STATUS hsakmt_fmm_share_memory(void *MemoryAddress,
HSAuint64 SizeInBytes,
HsaSharedMemoryHandle *SharedMemoryHandle);
HSAKMT_STATUS hsakmt_fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemoryHandle,
HSAuint64 *SizeInBytes,
void **MemoryAddress,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size);
HSAKMT_STATUS hsakmt_fmm_map_to_gpu_nodes(void *address, uint64_t size,
uint32_t *nodes_to_map, uint64_t num_of_nodes, uint64_t *gpuvm_address);
int hsakmt_open_drm_render_device(int minor);
int hsakmt_open_drm_render_device(HsaKFDContext *ctx, int minor);
void *hsakmt_mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
uint64_t guard_size, void *aper_base, void *aper_limit, int fd);
@@ -27,10 +27,8 @@
// HSAKMT global data
int hsakmt_kfd_fd = -1;
int hsakmt_udmabuf_dev_fd = -1;
unsigned long hsakmt_kfd_open_count;
unsigned long hsakmt_system_properties_count;
pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
bool hsakmt_is_dgpu;
@@ -0,0 +1,827 @@
/*
* Copyright © 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _HSAKMTCTX_H_
#define _HSAKMTCTX_H_
#include "hsakmt/hsakmttypes.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _HsaKFDContext HsaKFDContext;
/**
The context-aware version for openning the kfd device.
"Opens" the HSA kernel driver for user-kernel mode communication.
On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
is responsible for user-kernel communication, this handle is used internally by
the thunk library to send device I/O control to the HSA kernel driver.
No other thunk library function may be called unless the user-kernel communication
channel is opened first.
On Linux this call opens the "/dev/kfd" device file to establish a communication
path to the kernel.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtOpenKFDCtx(
HsaKFDContext **pCtx //IN/OUT
);
/**
The context-aware version for closing the kfd device.
"Closes" the user-kernel communication path.
On Windows, the handle obtained by the hsaKmtOpenKFDCtx() function is closed;
no other communication with the kernel driver is possible after the successful
execution of the hsaKmtCloseKFDCtx() function. Depending on the failure reason,
the user-kernel communication path may or may not be still active.
On Linux the function closes the "dev/kfd" device file.
No further communication to the kernel driver is allowed until hsaKmtOpenKFDCtx()
function is called again.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCloseKFDCtx( void );
/**
The function takes a "snapshot" of the topology information within the KFD
to avoid any changes during the enumeration process.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAcquireSystemPropertiesCtx(
HsaKFDContext *ctx, //IN
HsaSystemProperties* SystemProperties //OUT
);
/**
Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReleaseSystemPropertiesCtx(
HsaKFDContext *ctx //IN
);
/**
Retrieves the discoverable sub-properties for a given HSA
node. The parameters returned allow the application or runtime to size the
management structures necessary to store the information.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodePropertiesCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HsaNodeProperties* NodeProperties //OUT
);
/**
Retrieves the memory properties of a specific HSA node.
the memory pointer passed as MemoryProperties is sized as
NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
hsaKmtGetNodePropertiesCtx() call.
Some of the data returned is optional. Not all implementations may return all
parameters in the hsaMemoryProperties.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeMemoryPropertiesCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSAuint32 NumBanks, //IN
HsaMemoryProperties* MemoryProperties //OUT
);
/**
Retrieves the cache properties of a specific HSA node and processor ID.
ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
via the hsaKmtGetNodePropertiesCtx() call.
The memory pointer passed as CacheProperties is sized as
NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
hsaKmtGetNodePropertiesCtx() call.
The data returned is optional. Not all implementations may return all
parameters in the CacheProperties.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeCachePropertiesCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSAuint32 ProcessorId, //IN
HSAuint32 NumCaches, //IN
HsaCacheProperties* CacheProperties //OUT
);
/**
Retrieves the HSA IO affinity properties of a specific HSA node.
the memory pointer passed as Properties is sized as
NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
hsaKmtGetNodePropertiesCtx() call.
The data returned is optional. Not all implementations may return all
parameters in the IoLinkProperties.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetNodeIoLinkPropertiesCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSAuint32 NumIoLinks, //IN
HsaIoLinkProperties* IoLinkProperties //OUT
);
/**
Creates an operating system event associated with a HSA event ID
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateEventCtx(
HsaKFDContext *ctx, //IN
HsaEventDescriptor* EventDesc, //IN
bool ManualReset, //IN
bool IsSignaled, //IN
HsaEvent** Event //OUT
);
/**
Destroys an operating system event associated with a HSA event ID
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDestroyEventCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Event //IN
);
/**
Sets the specified event object to the signaled state
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetEventCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Event //IN
);
/**
Sets the specified event object to the non-signaled state
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtResetEventCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Event //IN
);
/**
Queries the state of the specified event object
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryEventStateCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Event //IN
);
/**
Checks the current state of the event object. If the object's state is
nonsignaled, the calling thread enters the wait state.
The function returns when one of the following occurs:
- The specified event object is in the signaled state.
- The time-out interval elapses.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnEventCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Event, //IN
HSAuint32 Milliseconds //IN
);
/**
Checks the current state of the event object. If the object's state is
nonsignaled, the calling thread enters the wait state. event_age can
help avoiding race conditions.
The function returns when one of the following occurs:
- The specified event object is in the signaled state.
- The time-out interval elapses.
- Tracking event age
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnEvent_ExtCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Event, //IN
HSAuint32 Milliseconds, //IN
uint64_t *event_age //IN/OUT
);
/**
Checks the current state of multiple event objects.
The function returns when one of the following occurs:
- Either any one or all of the specified objects are in the signaled state
- if "WaitOnAll" is "true" the function returns when the state of all
objects in array is signaled
- if "WaitOnAll" is "false" the function returns when the state of any
one of the objects is set to signaled
- The time-out interval elapses.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnMultipleEventsCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Events[], //IN
HSAuint32 NumEvents, //IN
bool WaitOnAll, //IN
HSAuint32 Milliseconds //IN
);
/**
Checks the current state of multiple event objects.
event_age can help avoiding race conditions.
The function returns when one of the following occurs:
- Either any one or all of the specified objects are in the signaled state
- if "WaitOnAll" is "true" the function returns when the state of all
objects in array is signaled
- if "WaitOnAll" is "false" the function returns when the state of any
one of the objects is set to signaled
- The time-out interval elapses.
- Tracking event age
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnMultipleEvents_ExtCtx(
HsaKFDContext *ctx, //IN
HsaEvent* Events[], //IN
HSAuint32 NumEvents, //IN
bool WaitOnAll, //IN
HSAuint32 Milliseconds, //IN
uint64_t *event_age //IN/OUT
);
/**
Creates a GPU queue with user-mode access rights
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateQueueCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSA_QUEUE_TYPE Type, //IN
HSAuint32 QueuePercentage, //IN
HSA_QUEUE_PRIORITY Priority, //IN
void* QueueAddress, //IN
HSAuint64 QueueSizeInBytes, //IN
HsaEvent* Event, //IN
HsaQueueResource* QueueResource //OUT
);
/**
Creates a GPU queue with user-mode access rights
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtCreateQueueExtCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSA_QUEUE_TYPE Type, //IN
HSAuint32 QueuePercentage, //IN
HSA_QUEUE_PRIORITY Priority, //IN
HSAuint32 SdmaEngineId, //IN
void* QueueAddress, //IN
HSAuint64 QueueSizeInBytes, //IN
HsaEvent* Event, //IN
HsaQueueResource* QueueResource //OUT
);
/**
Updates a queue
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtUpdateQueueCtx(
HsaKFDContext *ctx, //IN
HSA_QUEUEID QueueId, //IN
HSAuint32 QueuePercentage, //IN
HSA_QUEUE_PRIORITY Priority, //IN
void* QueueAddress, //IN
HSAuint64 QueueSize, //IN
HsaEvent* Event //IN
);
/**
Destroys a queue
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDestroyQueueCtx(
HsaKFDContext *ctx, //IN
HSA_QUEUEID QueueId //IN
);
/**
Set cu mask for a queue
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetQueueCUMaskCtx(
HsaKFDContext *ctx, //IN
HSA_QUEUEID QueueId, //IN
HSAuint32 CUMaskCount, //IN
HSAuint32* QueueCUMask //IN
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetQueueInfoCtx(
HsaKFDContext *ctx, //IN
HSA_QUEUEID QueueId, //IN
HsaQueueInfo *QueueInfo //IN
);
/**
Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryPolicyCtx(
HsaKFDContext *ctx, //IN
HSAuint32 Node, //IN
HSAuint32 DefaultPolicy, //IN
HSAuint32 AlternatePolicy, //IN
void* MemoryAddressAlternate, //IN (page-aligned)
HSAuint64 MemorySizeInBytes //IN (page-aligned)
);
/**
Allocates a memory buffer that may be accessed by the GPU
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocMemoryCtx(
HsaKFDContext *ctx, //IN
HSAuint32 PreferredNode, //IN
HSAuint64 SizeInBytes, //IN (multiple of page size)
HsaMemFlags MemFlags, //IN
void** MemoryAddress //IN/OUT (page-aligned)
);
/**
Allocates a memory buffer with specific alignment that may be accessed by the GPU
If Alignment is 0, the smallest possible alignment will be used
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocMemoryAlignCtx(
HsaKFDContext *ctx, //IN
HSAuint32 PreferredNode, //IN
HSAuint64 SizeInBytes, //IN (multiple of page size)
HSAuint64 Alignment, //IN (power of 2 and >= page size)
HsaMemFlags MemFlags, //IN
void** MemoryAddress //IN/OUT (page-aligned)
);
/**
Frees a memory buffer
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtFreeMemoryCtx(
HsaKFDContext *ctx, //IN
void* MemoryAddress, //IN (page-aligned)
HSAuint64 SizeInBytes //IN
);
/**
Inquires memory available for allocation as a memory buffer
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAvailableMemoryCtx(
HsaKFDContext *ctx, //IN
HSAuint32 Node, //IN
HSAuint64 *AvailableBytes //OUT
);
/**
Registers with KFD a memory buffer that may be accessed by the GPU
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemoryCtx(
HsaKFDContext *ctx, //IN
void* MemoryAddress, //IN (cache-aligned)
HSAuint64 MemorySizeInBytes //IN (cache-aligned)
);
/**
Registers with KFD a memory buffer that may be accessed by specific GPUs
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemoryToNodesCtx(
HsaKFDContext *ctx, //IN
void *MemoryAddress, //IN (cache-aligned)
HSAuint64 MemorySizeInBytes, //IN (cache-aligned)
HSAuint64 NumberOfNodes, //IN
HSAuint32* NodeArray //IN
);
/**
Registers with KFD a memory buffer with memory attributes
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterMemoryWithFlagsCtx(
HsaKFDContext *ctx, //IN
void *MemoryAddress, //IN (cache-aligned)
HSAuint64 MemorySizeInBytes, //IN (cache-aligned)
HsaMemFlags MemFlags //IN
);
/**
Registers with KFD a graphics buffer and returns graphics metadata
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterGraphicsHandleToNodesCtx(
HsaKFDContext *ctx, //IN
HSAuint64 GraphicsResourceHandle, //IN
HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT
HSAuint64 NumberOfNodes, //IN
HSAuint32* NodeArray //IN
);
/**
Similar to hsaKmtRegisterGraphicsHandleToNodes but provides registration
options via RegisterFlags.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterGraphicsHandleToNodesExtCtx(
HsaKFDContext *ctx, //IN
HSAuint64 GraphicsResourceHandle, //IN
HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT
HSAuint64 NumberOfNodes, //IN
HSAuint32* NodeArray, //IN
HSA_REGISTER_MEM_FLAGS RegisterFlags //IN
);
/**
* Export a dmabuf handle and offset for a given memory address
*
* Validates that @MemoryAddress belongs to a valid allocation and that the
* @MemorySizeInBytes doesn't exceed the end of that allocation. Returns a
* dmabuf fd of the allocation and the offset of MemoryAddress within that
* allocation. The memory will remain allocated even after the allocation is
* freed by hsaKmtFreeMemory for as long as a dmabuf fd remains open or any
* importer of that fd maintains an active reference to the memory.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtExportDMABufHandleCtx(
HsaKFDContext *ctx, //IN
void *MemoryAddress, //IN
HSAuint64 MemorySizeInBytes, //IN
int *DMABufFd, //OUT
HSAuint64 *Offset //OUT
);
/**
Export a memory buffer for sharing with other processes
NOTE: for the current revision of the thunk spec, SizeInBytes
must match whole allocation.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtShareMemoryCtx(
HsaKFDContext *ctx, //IN
void *MemoryAddress, //IN
HSAuint64 SizeInBytes, //IN
HsaSharedMemoryHandle *SharedMemoryHandle //OUT
);
/**
Register shared memory handle
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterSharedHandleCtx(
HsaKFDContext *ctx, //IN
const HsaSharedMemoryHandle *SharedMemoryHandle, //IN
void **MemoryAddress, //OUT
HSAuint64 *SizeInBytes //OUT
);
/**
Register shared memory handle to specific nodes only
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtRegisterSharedHandleToNodesCtx(
HsaKFDContext *ctx, //IN
const HsaSharedMemoryHandle *SharedMemoryHandle, //IN
void **MemoryAddress, //OUT
HSAuint64 *SizeInBytes, //OUT
HSAuint64 NumberOfNodes, //OUT
HSAuint32* NodeArray //OUT
);
/**
Unregisters with KFD a memory buffer
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtDeregisterMemoryCtx(
HsaKFDContext *ctx, //IN
void* MemoryAddress //IN
);
/**
Ensures that the memory is resident and can be accessed by GPU
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtMapMemoryToGPUCtx(
HsaKFDContext *ctx, //IN
void* MemoryAddress, //IN (page-aligned)
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
HSAuint64* AlternateVAGPU //OUT (page-aligned)
);
/**
Ensures that the memory is resident and can be accessed by GPUs
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtMapMemoryToGPUNodesCtx(
HsaKFDContext *ctx, //IN
void* MemoryAddress, //IN (page-aligned)
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
HSAuint64* AlternateVAGPU, //OUT (page-aligned)
HsaMemMapFlags MemMapFlags, //IN
HSAuint64 NumberOfNodes, //IN
HSAuint32* NodeArray //IN
);
/**
Releases the residency of the memory
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtUnmapMemoryToGPUCtx(
HsaKFDContext *ctx, //IN
void* MemoryAddress //IN (page-aligned)
);
/**
Stub for Unmap Graphic Handle
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtUnmapGraphicHandleCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HSAuint64 FlatMemoryAddress, //IN
HSAuint64 SizeInBytes //IN
);
/**
* Get an AMDGPU device handle for a GPU node
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetAMDGPUDeviceHandleCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HsaAMDGPUDeviceHandle *DeviceHandle //OUT
);
/**
Sets trap handler and trap buffer to be used for all queues
associated with the specified NodeId within this process context
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetTrapHandlerCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
void* TrapHandlerBaseAddress, //IN
HSAuint64 TrapHandlerSizeInBytes, //IN
void* TrapBufferBaseAddress, //IN
HSAuint64 TrapBufferSizeInBytes //IN
);
/**
Gets image tile configuration.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetTileConfigCtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN
HsaGpuTileConfig *config //IN/OUT
);
/**
Returns information about pointers
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryPointerInfoCtx(
HsaKFDContext *ctx, //IN
const void *Pointer, //IN
HsaPointerInfo *PointerInfo //OUT
);
/**
Associates user data with a memory allocation
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryUserDataCtx(
HsaKFDContext *ctx, //IN
const void * Pointer, //IN
void * UserData //IN
);
/**
Allocate GWS resource for a queue
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocQueueGWSCtx(
HsaKFDContext *ctx, //IN
HSA_QUEUEID QueueId, //IN
HSAuint32 nGWS, //IN
HSAuint32 *firstGWS //OUT
);
/* Helper functions for calling KFD SVM ioctl */
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSVMSetAttrCtx(
HsaKFDContext *ctx, //IN
void *start_addr, //IN: Start of the virtual address range (page-aligned)
HSAuint64 size, //IN: size (page-aligned)
unsigned int nattr, //IN: number of attributes
HSA_SVM_ATTRIBUTE *attrs //IN: array of attributes
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSVMGetAttrCtx(
HsaKFDContext *ctx, //IN
void *start_addr, //IN: Start of the virtual address range (page-aligned)
HSAuint64 size, //IN: size (page aligned)
unsigned int nattr, //IN: number of attributes
HSA_SVM_ATTRIBUTE *attrs //IN/OUT: array of attributes
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetXNACKModeCtx(
HsaKFDContext *ctx, //IN
HSAint32 enable //IN: enable/disable XNACK node.
);
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetXNACKModeCtx(
HsaKFDContext *ctx, //IN
HSAint32 *enable //OUT: returns XNACK value.
);
/**
Open anonymous file handle to enable events and read SMI events.
To enable events, write 64bit events mask to fd, event enums as bit index.
for example, event mask ctx(HSA_SMI_EVENT_MASK_FROM_INDEXCtx(HSA_SMI_EVENT_INDEX_MAX) - 1) to enable all events
Read event from fd is not blocking, use poll with timeout value to check if event is available.
Event is dropped if kernel event fifo is full.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtOpenSMICtx(
HsaKFDContext *ctx, //IN
HSAuint32 NodeId, //IN: GPU node_id to receive the SMI event from
int *fd //OUT: anonymous file handle
);
/**
If this is GPU Mapped memory, remap the first page at this address to be normal system memory
This is used in ASAN mode to remap the first page of device memory to share host ASAN logic.
This function is only supported when libhsakmt is compiled in ASAN mode.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReplaceAsanHeaderPageCtx(
HsaKFDContext *ctx, //IN
void *addr //IN: Start of the virtual address page
);
/**
If this is GPU Mapped memory, remap the first page back to the original GPU memory
This is used in ASAN mode to remap the first page back to its original mapping.
This function is only supported when libhsakmt is compiled in ASAN mode.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtReturnAsanHeaderPageCtx(
HsaKFDContext *ctx, //IN
void *addr //IN: Start of the virtual address page
);
#ifdef __cplusplus
} //extern "C"
#endif
#endif //_HSAKMTCTX_H_
@@ -145,8 +145,8 @@ void model_init_env_vars(void)
abort();
#endif
}
assert(hsakmt_kfd_fd < 0);
hsakmt_kfd_fd = fd;
assert(hsakmt_primary_kfd_ctx.fd < 0);
hsakmt_kfdcontext_init_context(fd, &hsakmt_primary_kfd_ctx);
pthread_condattr_t condattr;
pthread_condattr_init(&condattr);
pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
@@ -193,7 +193,7 @@ static uint64_t allocate_from_memfd(uint64_t size, uint64_t align)
model_memfd_size = (model_memfd_size + align - 1) & ~(align - 1);
uint64_t offset = model_memfd_size;
model_memfd_size += size;
int ret = ftruncate(hsakmt_kfd_fd, model_memfd_size);
int ret = ftruncate(hsakmt_primary_kfd_ctx.fd, model_memfd_size);
if (ret < 0)
{
fprintf(stderr, "model: ftruncate on memfd failed\n");
@@ -269,7 +269,7 @@ void model_init(void)
HSAKMT_STATUS result;
HsaSystemProperties props;
/* Read the topology to determine nodes. */
result = hsakmt_topology_sysfs_get_system_props(&props);
result = hsakmt_topology_sysfs_get_system_props(&hsakmt_primary_kfd_ctx, &props);
if (result != HSAKMT_STATUS_SUCCESS)
{
fprintf(stderr, "model: Failed to parse topology\n");
@@ -503,7 +503,7 @@ static int model_kfd_ioctl_locked(unsigned long request, void *arg)
// unclear whether the current implementation causes kernel data
// structures to grow. But in practice, it almost certainly never
// matters.
int ret = fallocate(hsakmt_kfd_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
int ret = fallocate(hsakmt_primary_kfd_ctx.fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
mem_data->file_offset, mem_data->size);
if (ret != 0)
{
@@ -539,7 +539,7 @@ static int model_kfd_ioctl_locked(unsigned long request, void *arg)
pr_debug("MODEL IOCTL: AMDKFD_IOC_MAP_MEMORY_TO_GPU: VA: %lx : Size: %lu, Flags: %x\n", mem_data->va_addr, mem_data->size, mem_data->flags);
void *ret = mmap(VOID_PTR_ADD(model_nodes[node_id].aperture, mem_data->va_addr),
mem_data->size, prot,
MAP_SHARED | MAP_FIXED, hsakmt_kfd_fd, mem_data->file_offset);
MAP_SHARED | MAP_FIXED, hsakmt_primary_kfd_ctx.fd, mem_data->file_offset);
if (ret == MAP_FAILED)
{
fprintf(stderr, "model: mmap failed\n");
@@ -767,7 +767,7 @@ static int model_kfd_ioctl_locked(unsigned long request, void *arg)
model_functions->register_queue(model_nodes[node_id].model, &info);
model_queues[queue_id].node_id = node_id;
args->queue_id = queue_id;
// Note that strictly speaking, this is the offset into the hsakmt_kfd_fd
// Note that strictly speaking, this is the offset into the hsakmt_primary_kfd_ctx.fd
// file, not the DRM fd (but they are the same in our case).
args->doorbell_offset = model_nodes[node_id].doorbell_offset + 8 * queue_id;
return 0;
@@ -0,0 +1,63 @@
/*
* Copyright © 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "kfdcontext.h"
#include "libhsakmt.h"
#include <stdlib.h>
#include <stddef.h>
#include <assert.h>
#include <stdio.h>
#include <errno.h>
void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx)
{
assert(fd >= 0);
assert(ctx);
ctx->fd = fd;
ctx->queue_context = NULL;
ctx->fmm_context = NULL;
ctx->event_context = NULL;
}
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
{
if (!ctx)
return;
if (ctx->queue_context) {
free(ctx->queue_context);
ctx->queue_context = NULL;
}
if (ctx->fmm_context) {
free(ctx->fmm_context);
ctx->fmm_context = NULL;
}
if (ctx->event_context) {
free(ctx->event_context);
ctx->event_context = NULL;
}
ctx->fd = -1;
}
@@ -0,0 +1,74 @@
/*
* Copyright © 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _KFDCONTEXT_H_
#define _KFDCONTEXT_H_
#include <stdint.h>
struct hsa_kfd_queue_context;
struct hsa_kfd_fmm_context;
struct hsa_kfd_event_context;
/*
* HsaKFDContext
*
* Represents the execution context for a connection to the Kernel Fusion Driver (KFD).
*
* This structure encapsulates all state required to manage a KFD session, including:
* - The file descriptor associated with the open KFD device
* - Related resources tied to this file descriptor
*
* Multiple HsaKFDContext instances can coexist simultaneously, each maintaining its own
* independent set of resources. These contexts are fully isolated from one another and
* must not have their resources mixed. For example, memory resources created in
* context A cannot be used in context B directly. If resources need to be shared between
* contexts, they must be explicitly exported and imported using the appropriate APIs.
*/
typedef struct _HsaKFDContext
{
/* File descriptor for the KFD device */
int fd;
/* Queue context for managing user queues */
struct hsa_kfd_queue_context *queue_context;
/* Memory management context for managing memory */
struct hsa_kfd_fmm_context *fmm_context;
/* Event context for managing events */
struct hsa_kfd_event_context *event_context;
} HsaKFDContext;
// Initialize a pre-allocated HsaKFDContext with the given file descriptor
void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx);
// Release all resources associated with the given KFD context
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx);
struct hsa_kfd_fmm_context *hsakmt_kfdcontext_get_fmm_context(HsaKFDContext *ctx);
struct hsa_kfd_queue_context *hsakmt_kfdcontext_get_queue_context(HsaKFDContext *ctx);
struct hsa_kfd_event_context *hsakmt_kfdcontext_get_event_context(HsaKFDContext *ctx);
#endif /* _KFDCONTEXT_H_ */
+15 -10
View File
@@ -28,11 +28,12 @@
#include "hsakmt/linux/kfd_ioctl.h"
#include "hsakmt/hsakmt.h"
#include "kfdcontext.h"
#include "hsakmtctx.h"
#include <pthread.h>
#include <stdint.h>
#include <limits.h>
extern int hsakmt_kfd_fd;
extern int hsakmt_udmabuf_dev_fd;
extern unsigned long hsakmt_kfd_open_count;
extern bool hsakmt_forked;
@@ -42,6 +43,7 @@ extern bool hsakmt_is_svm_api_supported;
extern int hsakmt_zfb_support;
extern HsaVersionInfo hsakmt_kfd_version_info;
extern HsaKFDContext hsakmt_primary_kfd_ctx;
#undef HSAKMTAPI
#define HSAKMTAPI __attribute__((visibility ("default")))
@@ -196,7 +198,7 @@ int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
uint32_t NumberOfNodes, uint32_t *NodeArray);
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props);
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx, HsaSystemProperties *props);
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
HsaNodeProperties *NodeProperties);
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
@@ -207,13 +209,16 @@ bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId);
HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags);
void* hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
void* hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
uint32_t size, uint32_t align,
uint32_t gpu_id,
uint32_t NodeId, bool NonPaged,
bool DeviceLocal, bool Uncached);
void hsakmt_free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align);
HSAKMT_STATUS hsakmt_init_process_doorbells(unsigned int NumNodes);
void hsakmt_destroy_process_doorbells(void);
void hsakmt_free_exec_aligned_memory_gpu(HsaKFDContext *ctx,
void *addr, uint32_t size, uint32_t align);
HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx,
unsigned int NumNodes);
void hsakmt_destroy_process_doorbells(HsaKFDContext *ctx);
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes);
void hsakmt_destroy_device_debugging_memory(void);
bool hsakmt_debug_get_reg_status(uint32_t node_id);
@@ -239,10 +244,10 @@ extern int hsakmt_ioctl(int fd, unsigned long request, void *arg);
#define POWER_OF_2(x) ((x && (!(x & (x - 1)))) ? 1 : 0)
void hsakmt_clear_events_page(void);
void hsakmt_fmm_clear_all_mem(void);
void hsakmt_fmm_clear_all_aperture(void);
void hsakmt_clear_process_doorbells(void);
void hsakmt_clear_events_page(HsaKFDContext *ctx);
void hsakmt_fmm_clear_all_mem(HsaKFDContext *ctx);
void hsakmt_fmm_clear_all_aperture(HsaKFDContext *ctx);
void hsakmt_clear_process_doorbells(HsaKFDContext *ctx);
uint32_t hsakmt_get_num_sysfs_nodes(void);
bool hsakmt_is_forked_child(void);
+301 -62
View File
@@ -34,7 +34,8 @@
#include <fcntl.h>
#include "fmm.h"
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicyCtx(HsaKFDContext *ctx,
HSAuint32 Node,
HSAuint32 DefaultPolicy,
HSAuint32 AlternatePolicy,
void *MemoryAddressAlternate,
@@ -86,7 +87,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
args.alternate_aperture_base = (uintptr_t) MemoryAddressAlternate;
args.alternate_aperture_size = MemorySizeInBytes;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}
@@ -104,15 +105,17 @@ HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags)
}
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode,
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryCtx(HsaKFDContext *ctx,
HSAuint32 PreferredNode,
HSAuint64 SizeInBytes,
HsaMemFlags MemFlags,
void **MemoryAddress)
{
return hsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
return hsaKmtAllocMemoryAlignCtx(ctx, PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlignCtx(HsaKFDContext *ctx,
HSAuint32 PreferredNode,
HSAuint64 SizeInBytes,
HSAuint64 Alignment,
HsaMemFlags MemFlags,
@@ -160,7 +163,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
return HSAKMT_STATUS_NOT_IMPLEMENTED;
}
*MemoryAddress = hsakmt_fmm_allocate_scratch(gpu_id, *MemoryAddress, SizeInBytes);
*MemoryAddress = hsakmt_fmm_allocate_scratch(ctx, gpu_id, *MemoryAddress, SizeInBytes);
if (!(*MemoryAddress)) {
pr_err("[%s] failed to allocate %lu bytes from scratch\n",
@@ -183,7 +186,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
if (hsakmt_zfb_support && gpu_id && MemFlags.ui32.NonPaged == 1)
MemFlags.ui32.CoarseGrain = 1;
*MemoryAddress = hsakmt_fmm_allocate_host(gpu_id, MemFlags.ui32.GTTAccess ? 0 : PreferredNode,
*MemoryAddress = hsakmt_fmm_allocate_host(ctx, gpu_id, MemFlags.ui32.GTTAccess ? 0 : PreferredNode,
*MemoryAddress, SizeInBytes, Alignment, MemFlags);
if (!(*MemoryAddress)) {
@@ -204,7 +207,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
return HSAKMT_STATUS_INVALID_PARAMETER;
}
*MemoryAddress = hsakmt_fmm_allocate_device(gpu_id, PreferredNode, *MemoryAddress,
*MemoryAddress = hsakmt_fmm_allocate_device(ctx, gpu_id, PreferredNode, *MemoryAddress,
SizeInBytes, Alignment, MemFlags);
if (!(*MemoryAddress)) {
@@ -218,7 +221,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
}
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemoryCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 SizeInBytes)
{
CHECK_KFD_OPEN();
@@ -230,11 +234,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
return HSAKMT_STATUS_ERROR;
}
return hsakmt_fmm_release(MemoryAddress);
return hsakmt_fmm_release(ctx, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
HSAuint64 *AvailableBytes)
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemoryCtx(HsaKFDContext *ctx,
HSAuint32 Node,
HSAuint64 *AvailableBytes)
{
struct kfd_ioctl_get_available_memory_args args = {};
HSAKMT_STATUS result;
@@ -250,14 +255,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
return result;
}
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args))
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args))
return HSAKMT_STATUS_ERROR;
*AvailableBytes = args.available;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes)
{
CHECK_KFD_OPEN();
@@ -271,11 +277,13 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
HsaMemFlags flags;
flags.ui32.CoarseGrain = 1;
flags.ui32.ExtendedCoherent = 0;
return hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
return hsakmt_fmm_register_memory(ctx,
MemoryAddress, MemorySizeInBytes,
NULL, 0, flags);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodesCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
@@ -299,7 +307,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
flags.ui32.CoarseGrain = 1;
flags.ui32.ExtendedCoherent = 0;
ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
ret = hsakmt_fmm_register_memory(ctx,
MemoryAddress, MemorySizeInBytes,
gpu_id_array,
NumberOfNodes*sizeof(uint32_t),
flags);
@@ -310,7 +319,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlagsCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HsaMemFlags MemFlags)
{
@@ -331,21 +341,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
/* TODO: support mixed APU and dGPU configurations */
return HSAKMT_STATUS_NOT_SUPPORTED;
ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
ret = hsakmt_fmm_register_memory(ctx,
MemoryAddress, MemorySizeInBytes,
NULL, 0, MemFlags);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle,
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesCtx(HsaKFDContext *ctx,
HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
HSA_REGISTER_MEM_FLAGS regFlags;
regFlags.Value = 0;
return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle,
return hsaKmtRegisterGraphicsHandleToNodesExtCtx(ctx,
GraphicsResourceHandle,
GraphicsResourceInfo,
NumberOfNodes,
NodeArray,
@@ -353,7 +366,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsRe
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle,
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExtCtx(HsaKFDContext *ctx,
HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray,
@@ -371,7 +385,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 Graphic
}
if (ret == HSAKMT_STATUS_SUCCESS) {
ret = hsakmt_fmm_register_graphics_handle(
ret = hsakmt_fmm_register_graphics_handle(ctx,
GraphicsResourceHandle, GraphicsResourceInfo,
gpu_id_array, NumberOfNodes * sizeof(uint32_t), RegisterFlags);
if (ret != HSAKMT_STATUS_SUCCESS)
@@ -381,7 +395,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 Graphic
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandleCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
int *DMABufFd,
HSAuint64 *Offset)
@@ -391,11 +406,13 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
return hsakmt_fmm_export_dma_buf_fd(MemoryAddress, MemorySizeInBytes,
return hsakmt_fmm_export_dma_buf_fd(ctx,
MemoryAddress, MemorySizeInBytes,
DMABufFd, Offset);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemoryCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 SizeInBytes,
HsaSharedMemoryHandle *SharedMemoryHandle)
{
@@ -406,25 +423,28 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
if (!SharedMemoryHandle)
return HSAKMT_STATUS_INVALID_PARAMETER;
return hsakmt_fmm_share_memory(MemoryAddress, SizeInBytes, SharedMemoryHandle);
return hsakmt_fmm_share_memory(ctx, MemoryAddress, SizeInBytes, SharedMemoryHandle);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes)
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleCtx(HsaKFDContext *ctx,
const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes)
{
CHECK_KFD_OPEN();
pr_debug("[%s] handle %p\n", __func__, SharedMemoryHandle);
return hsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle,
return hsaKmtRegisterSharedHandleToNodesCtx(ctx,
SharedMemoryHandle,
MemoryAddress,
SizeInBytes,
0,
NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle,
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodesCtx(HsaKFDContext *ctx,
const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes,
HSAuint64 NumberOfNodes,
@@ -447,7 +467,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryH
goto error;
}
ret = hsakmt_fmm_register_shared_memory(SharedMemoryHandle,
ret = hsakmt_fmm_register_shared_memory(ctx,
SharedMemoryHandle,
SizeInBytes,
MemoryAddress,
gpu_id_array,
@@ -487,17 +508,17 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMWrite(HSAuint32 Pid,
return HSAKMT_STATUS_NOT_IMPLEMENTED;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress)
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemoryCtx(HsaKFDContext *ctx, void *MemoryAddress)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
return hsakmt_fmm_deregister_memory(MemoryAddress);
return hsakmt_fmm_deregister_memory(ctx, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 *AlternateVAGPU)
{
@@ -513,10 +534,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
if (AlternateVAGPU)
*AlternateVAGPU = 0;
return hsakmt_fmm_map_to_gpu(MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
return hsakmt_fmm_map_to_gpu(ctx, MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodesCtx(HsaKFDContext *ctx,
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 *AlternateVAGPU,
HsaMemMapFlags MemMapFlags,
@@ -537,16 +559,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
}
if (!hsakmt_is_dgpu && NumberOfNodes == 1)
return hsaKmtMapMemoryToGPU(MemoryAddress,
MemorySizeInBytes,
AlternateVAGPU);
return hsaKmtMapMemoryToGPUCtx(ctx, MemoryAddress,
MemorySizeInBytes, AlternateVAGPU);
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
NumberOfNodes, NodeArray);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
ret = hsakmt_fmm_map_to_gpu_nodes(MemoryAddress, MemorySizeInBytes,
ret = hsakmt_fmm_map_to_gpu_nodes(ctx, MemoryAddress, MemorySizeInBytes,
gpu_id_array, NumberOfNodes, AlternateVAGPU);
if (gpu_id_array)
@@ -555,7 +576,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPUCtx(HsaKFDContext *ctx, void *MemoryAddress)
{
CHECK_KFD_OPEN();
@@ -567,7 +588,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
return HSAKMT_STATUS_SUCCESS;
}
if (!hsakmt_fmm_unmap_from_gpu(MemoryAddress))
if (!hsakmt_fmm_unmap_from_gpu(ctx, MemoryAddress))
return HSAKMT_STATUS_SUCCESS;
else
return HSAKMT_STATUS_ERROR;
@@ -588,16 +609,16 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapGraphicHandle(HSAuint32 NodeId,
return HSAKMT_STATUS_NOT_IMPLEMENTED;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandleCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSAuint64 FlatMemoryAddress,
HSAuint64 SizeInBytes)
{
CHECK_KFD_OPEN();
return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
return hsaKmtUnmapMemoryToGPUCtx(ctx, PORT_UINT64_TO_VPTR(FlatMemoryAddress));
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config)
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfigCtx(HsaKFDContext *ctx,
HSAuint32 NodeId, HsaGpuTileConfig *config)
{
struct kfd_ioctl_get_tile_config_args args = {0};
uint32_t gpu_id;
@@ -623,7 +644,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *
args.num_tile_configs = config->NumTileConfigs;
args.num_macro_tile_configs = config->NumMacroTileConfigs;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0)
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0)
return HSAKMT_STATUS_ERROR;
config->NumTileConfigs = args.num_tile_configs;
@@ -637,7 +658,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfoCtx(HsaKFDContext *ctx,
const void *Pointer,
HsaPointerInfo *PointerInfo)
{
CHECK_KFD_OPEN();
@@ -646,47 +668,264 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
if (!PointerInfo)
return HSAKMT_STATUS_INVALID_PARAMETER;
return hsakmt_fmm_get_mem_info(Pointer, PointerInfo);
return hsakmt_fmm_get_mem_info(ctx, Pointer, PointerInfo);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer,
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserDataCtx(HsaKFDContext *ctx,
const void *Pointer,
void *UserData)
{
CHECK_KFD_OPEN();
pr_debug("[%s] pointer %p\n", __func__, Pointer);
return hsakmt_fmm_set_mem_user_data(Pointer, UserData);
return hsakmt_fmm_set_mem_user_data(ctx, Pointer, UserData);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr)
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPageCtx(HsaKFDContext *ctx, void *addr)
{
#ifdef SANITIZER_AMDGPU
pr_debug("[%s] address %p\n", __func__, addr);
CHECK_KFD_OPEN();
return hsakmt_fmm_replace_asan_header_page(addr);
return hsakmt_fmm_replace_asan_header_page(ctx, addr);
#else
return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPageCtx(HsaKFDContext *ctx, void *addr)
{
#ifdef SANITIZER_AMDGPU
pr_debug("[%s] address %p\n", __func__, addr);
CHECK_KFD_OPEN();
return hsakmt_fmm_return_asan_header_page(addr);
return hsakmt_fmm_return_asan_header_page(ctx, addr);
#else
return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandleCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HsaAMDGPUDeviceHandle *DeviceHandle)
{
CHECK_KFD_OPEN();
return hsakmt_fmm_get_amdgpu_device_handle(NodeId, DeviceHandle);
return hsakmt_fmm_get_amdgpu_device_handle(ctx, NodeId, DeviceHandle);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
HSAuint32 DefaultPolicy,
HSAuint32 AlternatePolicy,
void *MemoryAddressAlternate,
HSAuint64 MemorySizeInBytes)
{
return hsaKmtSetMemoryPolicyCtx(&hsakmt_primary_kfd_ctx, Node,
DefaultPolicy, AlternatePolicy,
MemoryAddressAlternate, MemorySizeInBytes);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode,
HSAuint64 SizeInBytes,
HsaMemFlags MemFlags,
void **MemoryAddress)
{
return hsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
HSAuint64 SizeInBytes,
HSAuint64 Alignment,
HsaMemFlags MemFlags,
void **MemoryAddress)
{
return hsaKmtAllocMemoryAlignCtx(&hsakmt_primary_kfd_ctx, PreferredNode,
SizeInBytes, Alignment, MemFlags, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
HSAuint64 SizeInBytes)
{
return hsaKmtFreeMemoryCtx(&hsakmt_primary_kfd_ctx, MemoryAddress, SizeInBytes);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
HSAuint64 *AvailableBytes)
{
return hsaKmtAvailableMemoryCtx(&hsakmt_primary_kfd_ctx, Node, AvailableBytes);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
HSAuint64 MemorySizeInBytes)
{
return hsaKmtRegisterMemoryCtx(&hsakmt_primary_kfd_ctx, MemoryAddress, MemorySizeInBytes);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
return hsaKmtRegisterMemoryToNodesCtx(&hsakmt_primary_kfd_ctx,
MemoryAddress, MemorySizeInBytes,
NumberOfNodes, NodeArray);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HsaMemFlags MemFlags)
{
return hsaKmtRegisterMemoryWithFlagsCtx(&hsakmt_primary_kfd_ctx,
MemoryAddress, MemorySizeInBytes, MemFlags);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
HSA_REGISTER_MEM_FLAGS regFlags;
regFlags.Value = 0;
return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle,
GraphicsResourceInfo,
NumberOfNodes,
NodeArray,
regFlags);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray,
HSA_REGISTER_MEM_FLAGS RegisterFlags)
{
return hsaKmtRegisterGraphicsHandleToNodesExtCtx(&hsakmt_primary_kfd_ctx,
GraphicsResourceHandle,
GraphicsResourceInfo,
NumberOfNodes,
NodeArray,
RegisterFlags);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
int *DMABufFd,
HSAuint64 *Offset)
{
return hsaKmtExportDMABufHandleCtx(&hsakmt_primary_kfd_ctx,
MemoryAddress, MemorySizeInBytes,
DMABufFd, Offset);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
HSAuint64 SizeInBytes,
HsaSharedMemoryHandle *SharedMemoryHandle)
{
return hsaKmtShareMemoryCtx(&hsakmt_primary_kfd_ctx,
MemoryAddress, SizeInBytes, SharedMemoryHandle);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(
const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes)
{
CHECK_KFD_OPEN();
pr_debug("[%s] handle %p\n", __func__, SharedMemoryHandle);
return hsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle,
MemoryAddress,
SizeInBytes,
0,
NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
return hsaKmtRegisterSharedHandleToNodesCtx(&hsakmt_primary_kfd_ctx,
SharedMemoryHandle,
MemoryAddress,
SizeInBytes,
NumberOfNodes,
NodeArray);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress)
{
return hsaKmtDeregisterMemoryCtx(&hsakmt_primary_kfd_ctx, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 *AlternateVAGPU)
{
return hsaKmtMapMemoryToGPUCtx(&hsakmt_primary_kfd_ctx, MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(
void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 *AlternateVAGPU,
HsaMemMapFlags MemMapFlags,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
return hsaKmtMapMemoryToGPUNodesCtx(&hsakmt_primary_kfd_ctx, MemoryAddress,
MemorySizeInBytes, AlternateVAGPU, MemMapFlags, NumberOfNodes, NodeArray);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
{
return hsaKmtUnmapMemoryToGPUCtx(&hsakmt_primary_kfd_ctx, MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId,
HSAuint64 FlatMemoryAddress,
HSAuint64 SizeInBytes)
{
return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config)
{
return hsaKmtGetTileConfigCtx(&hsakmt_primary_kfd_ctx, NodeId, config);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
HsaPointerInfo *PointerInfo)
{
return hsaKmtQueryPointerInfoCtx(&hsakmt_primary_kfd_ctx, Pointer, PointerInfo);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer,
void *UserData)
{
return hsaKmtSetMemoryUserDataCtx(&hsakmt_primary_kfd_ctx, Pointer, UserData);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr)
{
return hsaKmtReplaceAsanHeaderPageCtx(&hsakmt_primary_kfd_ctx, addr);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
{
return hsaKmtReturnAsanHeaderPageCtx(&hsakmt_primary_kfd_ctx, addr);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle(HSAuint32 NodeId,
HsaAMDGPUDeviceHandle *DeviceHandle)
{
CHECK_KFD_OPEN();
return hsaKmtGetAMDGPUDeviceHandleCtx(&hsakmt_primary_kfd_ctx, NodeId, DeviceHandle);
}
+33 -17
View File
@@ -51,6 +51,8 @@ static pid_t parent_pid = -1;
int hsakmt_debug_level;
bool hsakmt_forked;
HsaKFDContext hsakmt_primary_kfd_ctx = {.fd = -1};
/* hsakmt_is_forked_child detects when the process has forked since the last
* time this function was called. We cannot rely on pthread_atfork
* because the process can fork without calling the fork function in
@@ -99,16 +101,18 @@ static void child_fork_handler(void)
* The topology information is duplicated from the parent is valid
* in the child process so it is not cleared
*/
static void clear_after_fork(void)
static void clear_after_fork(HsaKFDContext *ctx)
{
hsakmt_clear_process_doorbells();
hsakmt_clear_events_page();
hsakmt_fmm_clear_all_mem();
hsakmt_clear_process_doorbells(ctx);
hsakmt_clear_events_page(ctx);
hsakmt_fmm_clear_all_mem(ctx);
hsakmt_destroy_device_debugging_memory();
if (hsakmt_kfd_fd) {
close(hsakmt_kfd_fd);
hsakmt_kfd_fd = -1;
}
int fd = ctx->fd;
if (fd >= 0) {
hsakmt_kfdcontext_clear_context(ctx);
close(fd);
}
if (hsakmt_udmabuf_dev_fd > 0) {
close(hsakmt_udmabuf_dev_fd);
hsakmt_udmabuf_dev_fd = -1;
@@ -150,7 +154,7 @@ static HSAKMT_STATUS init_vars_from_env(void)
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFDCtx(HsaKFDContext **pCtx)
{
HSAKMT_STATUS result;
int fd = -1;
@@ -166,7 +170,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
* belong to the parent
*/
if (hsakmt_is_forked_child())
clear_after_fork();
clear_after_fork(&hsakmt_primary_kfd_ctx);
if (hsakmt_kfd_open_count == 0) {
static bool atfork_installed = false;
@@ -184,15 +188,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
// Check if we are using the hsakmtmodel and setup initial state
model_init_env_vars();
if (hsakmt_kfd_fd < 0 && !hsakmt_use_model) {
if (hsakmt_primary_kfd_ctx.fd < 0 && !hsakmt_use_model) {
fd = open(kfd_device_name, O_RDWR | O_CLOEXEC);
if (fd == -1) {
result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
goto open_failed;
}
hsakmt_kfd_fd = fd;
hsakmt_kfdcontext_init_context(fd, &hsakmt_primary_kfd_ctx);
}
init_page_size();
@@ -216,8 +219,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
useSvmStr = getenv("HSA_USE_SVM");
hsakmt_is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0"));
if(!hsakmt_use_model)
result = hsakmt_topology_sysfs_get_system_props(&sys_props);
result = hsakmt_topology_sysfs_get_system_props(&hsakmt_primary_kfd_ctx, &sys_props);
if (result != HSAKMT_STATUS_SUCCESS)
goto topology_sysfs_failed;
@@ -227,6 +230,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
pr_warn("Insufficient Memory. Debugging unavailable\n");
hsakmt_init_counter_props(sys_props.NumNodes);
*pCtx = &hsakmt_primary_kfd_ctx;
if (!atfork_installed) {
/* Atfork handlers cannot be uninstalled and
@@ -241,6 +245,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
}
} else {
hsakmt_kfd_open_count++;
*pCtx = &hsakmt_primary_kfd_ctx;
result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
}
@@ -256,7 +261,7 @@ open_failed:
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFDCtx(void)
{
HSAKMT_STATUS result;
@@ -266,7 +271,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
if (--hsakmt_kfd_open_count == 0) {
hsakmt_destroy_counter_props();
hsakmt_destroy_device_debugging_memory();
hsakmt_fmm_clear_all_aperture();
hsakmt_fmm_clear_all_aperture(&hsakmt_primary_kfd_ctx);
}
result = HSAKMT_STATUS_SUCCESS;
@@ -277,3 +282,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
{
HsaKFDContext *pCtx = NULL;
return hsaKmtOpenKFDCtx(&pCtx);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
{
return hsaKmtCloseKFDCtx();
}
@@ -65,7 +65,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingQueryCapabilities(HSAuint32 NodeId, void
args.num_sample_info = sample_info_sz;
args.flags = 0;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
*size = args.num_sample_info;
@@ -111,7 +111,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingCreate(HSAuint32 NodeId, HsaPcSamplingIn
args.num_sample_info = 1;
args.trace_id = INVALID_TRACE_ID;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
if (err) {
switch (errno) {
case EINVAL:
@@ -151,7 +151,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingDestroy(HSAuint32 NodeId, HsaPcSamplingT
args.gpu_id = gpu_id;
args.trace_id = traceId;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
if (err) {
if (errno == EINVAL)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -181,7 +181,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStart(HSAuint32 NodeId, HsaPcSamplingTra
args.gpu_id = gpu_id;
args.trace_id = traceId;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
if (err) {
switch (errno) {
case EINVAL:
@@ -220,7 +220,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStop(HSAuint32 NodeId, HsaPcSamplingTrac
args.gpu_id = gpu_id;
args.trace_id = traceId;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
if (err) {
switch (errno) {
case EINVAL:
+232 -108
View File
@@ -35,6 +35,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
/* 1024 doorbells, 4 or 8 bytes each doorbell depending on ASIC generation */
#define DOORBELL_SIZE(gfxv) (((gfxv) >= 0x90000) ? 8 : 4)
@@ -80,8 +81,28 @@ struct process_doorbells {
pthread_mutex_t mutex;
};
static unsigned int num_doorbells;
static struct process_doorbells *doorbells;
struct hsa_kfd_queue_context
{
unsigned int num_doorbells;
struct process_doorbells *doorbells;
};
struct hsa_kfd_queue_context *hsakmt_kfdcontext_get_queue_context(HsaKFDContext *ctx)
{
assert(ctx);
if (ctx->queue_context)
return ctx->queue_context;
ctx->queue_context = calloc(1, sizeof(struct hsa_kfd_queue_context));
if (!ctx->queue_context) {
pr_err("Alloc memory failed for struct hsa_kfd_queue_context size %zu\n",
sizeof(struct hsa_kfd_queue_context));
return NULL;
}
return ctx->queue_context;
}
uint32_t hsakmt_get_vgpr_size_per_cu(uint32_t gfxv)
{
@@ -102,26 +123,27 @@ uint32_t hsakmt_get_vgpr_size_per_cu(uint32_t gfxv)
return vgpr_size;
}
HSAKMT_STATUS hsakmt_init_process_doorbells(unsigned int NumNodes)
HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx, unsigned int NumNodes)
{
unsigned int i;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
/* doorbells[] is accessed using Topology NodeId. This means doorbells[0],
/* queue_ctx->doorbells[] is accessed using Topology NodeId. This means doorbells[0],
* which corresponds to CPU only Node, might not be used
*/
doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
if (!doorbells)
queue_ctx->doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
if (!queue_ctx->doorbells)
return HSAKMT_STATUS_NO_MEMORY;
for (i = 0; i < NumNodes; i++) {
doorbells[i].use_gpuvm = false;
doorbells[i].size = 0;
doorbells[i].mapping = NULL;
pthread_mutex_init(&doorbells[i].mutex, NULL);
queue_ctx->doorbells[i].use_gpuvm = false;
queue_ctx->doorbells[i].size = 0;
queue_ctx->doorbells[i].mapping = NULL;
pthread_mutex_init(&queue_ctx->doorbells[i].mutex, NULL);
}
num_doorbells = NumNodes;
queue_ctx->num_doorbells = NumNodes;
return ret;
}
@@ -144,94 +166,105 @@ static void get_doorbell_map_info(uint32_t node_id,
return;
}
void hsakmt_destroy_process_doorbells(void)
void hsakmt_destroy_process_doorbells(HsaKFDContext *ctx)
{
unsigned int i;
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
struct process_doorbells *doorbells = queue_ctx->doorbells;
if (!doorbells)
return;
for (i = 0; i < num_doorbells; i++) {
for (i = 0; i < queue_ctx->num_doorbells; i++) {
if (!doorbells[i].size)
continue;
if (doorbells[i].use_gpuvm) {
hsakmt_fmm_unmap_from_gpu(doorbells[i].mapping);
hsakmt_fmm_release(doorbells[i].mapping);
hsakmt_fmm_unmap_from_gpu(ctx, doorbells[i].mapping);
hsakmt_fmm_release(ctx, doorbells[i].mapping);
} else
munmap(doorbells[i].mapping, doorbells[i].size);
}
free(doorbells);
doorbells = NULL;
num_doorbells = 0;
queue_ctx->doorbells = NULL;
queue_ctx->num_doorbells = 0;
}
/* This is a special funcion that should be called only from the child process
* after a fork(). This will clear doorbells duplicated from the parent.
*/
void hsakmt_clear_process_doorbells(void)
void hsakmt_clear_process_doorbells(HsaKFDContext *ctx)
{
unsigned int i;
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
if (!doorbells)
if (!queue_ctx->doorbells)
return;
for (i = 0; i < num_doorbells; i++) {
if (!doorbells[i].size)
for (i = 0; i < queue_ctx->num_doorbells; i++) {
if (!queue_ctx->doorbells[i].size)
continue;
if (!doorbells[i].use_gpuvm)
munmap(doorbells[i].mapping, doorbells[i].size);
if (!queue_ctx->doorbells[i].use_gpuvm)
munmap(queue_ctx->doorbells[i].mapping, queue_ctx->doorbells[i].size);
}
free(doorbells);
doorbells = NULL;
num_doorbells = 0;
free(queue_ctx->doorbells);
queue_ctx->doorbells = NULL;
queue_ctx->num_doorbells = 0;
}
static HSAKMT_STATUS map_doorbell_apu(HSAuint32 NodeId, HSAuint32 gpu_id,
static HSAKMT_STATUS map_doorbell_apu(HsaKFDContext *ctx,
HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_mmap_offset)
{
void *ptr;
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
ptr = mmap(0, doorbells[NodeId].size, PROT_READ|PROT_WRITE,
MAP_SHARED, hsakmt_kfd_fd, doorbell_mmap_offset);
ptr = mmap(0, queue_ctx->doorbells[NodeId].size, PROT_READ|PROT_WRITE,
MAP_SHARED, ctx->fd, doorbell_mmap_offset);
if (ptr == MAP_FAILED)
return HSAKMT_STATUS_ERROR;
doorbells[NodeId].mapping = ptr;
queue_ctx->doorbells[NodeId].mapping = ptr;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS map_doorbell_dgpu(HSAuint32 NodeId, HSAuint32 gpu_id,
static HSAKMT_STATUS map_doorbell_dgpu(HsaKFDContext *ctx,
HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_mmap_offset)
{
void *ptr;
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
ptr = hsakmt_fmm_allocate_doorbell(gpu_id, doorbells[NodeId].size,
ptr = hsakmt_fmm_allocate_doorbell(ctx,
gpu_id, queue_ctx->doorbells[NodeId].size,
doorbell_mmap_offset);
if (!ptr)
return HSAKMT_STATUS_ERROR;
/* map for GPU access */
if (hsakmt_fmm_map_to_gpu(ptr, doorbells[NodeId].size, NULL)) {
hsakmt_fmm_release(ptr);
if (hsakmt_fmm_map_to_gpu(ctx, ptr, queue_ctx->doorbells[NodeId].size, NULL)) {
hsakmt_fmm_release(ctx, ptr);
return HSAKMT_STATUS_ERROR;
}
doorbells[NodeId].mapping = ptr;
queue_ctx->doorbells[NodeId].mapping = ptr;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
static HSAKMT_STATUS map_doorbell(HsaKFDContext *ctx,
HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_mmap_offset)
{
HSAKMT_STATUS status = HSAKMT_STATUS_SUCCESS;
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
struct process_doorbells *doorbells = queue_ctx->doorbells;
pthread_mutex_lock(&doorbells[NodeId].mutex);
if (doorbells[NodeId].size) {
@@ -242,16 +275,16 @@ static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
get_doorbell_map_info(NodeId, &doorbells[NodeId]);
if (doorbells[NodeId].use_gpuvm) {
status = map_doorbell_dgpu(NodeId, gpu_id, doorbell_mmap_offset);
status = map_doorbell_dgpu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
if (status != HSAKMT_STATUS_SUCCESS) {
/* Fall back to the old method if KFD doesn't
* support doorbells in GPUVM
*/
doorbells[NodeId].use_gpuvm = false;
status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
status = map_doorbell_apu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
}
} else
status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
status = map_doorbell_apu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
if (status != HSAKMT_STATUS_SUCCESS)
doorbells[NodeId].size = 0;
@@ -279,13 +312,13 @@ static void *allocate_exec_aligned_memory_cpu(uint32_t size)
}
/* The bool return indicate whether the queue needs a context-save-restore area*/
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
static bool update_ctx_save_restore_size(HsaKFDContext *ctx, uint32_t nodeid, struct queue *q)
{
HsaNodeProperties node;
if (q->gfxv < GFX_VERSION_CARRIZO)
return false;
if (hsaKmtGetNodeProperties(nodeid, &node))
if (hsaKmtGetNodePropertiesCtx(ctx, nodeid, &node))
return false;
if (node.NumFComputeCores && node.NumSIMDPerCU) {
uint32_t ctl_stack_size, wg_data_size;
@@ -316,7 +349,8 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
return false;
}
void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uint32_t gpu_id,
void *hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
uint32_t size, uint32_t align, uint32_t gpu_id,
uint32_t NodeId, bool nonPaged,
bool DeviceLocal,
bool Uncached)
@@ -337,7 +371,7 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uin
size = ALIGN_UP(size, align);
if (DeviceLocal && !hsakmt_zfb_support)
mem = hsakmt_fmm_allocate_device(gpu_id, NodeId, mem, size, 0, flags);
mem = hsakmt_fmm_allocate_device(ctx, gpu_id, NodeId, mem, size, 0, flags);
else {
/* VRAM under ZFB mode should be supported here without any
* additional code
@@ -352,7 +386,7 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uin
cpu_id = 0;
}
}
mem = hsakmt_fmm_allocate_host(gpu_id, cpu_id, mem, size, 0, flags);
mem = hsakmt_fmm_allocate_host(ctx, gpu_id, cpu_id, mem, size, 0, flags);
}
if (!mem) {
@@ -366,35 +400,36 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uin
HsaMemMapFlags map_flags = {0};
HSAKMT_STATUS result;
result = hsaKmtMapMemoryToGPUNodes(mem, size, &gpu_va, map_flags, 1, nodes_array);
result = hsaKmtMapMemoryToGPUNodesCtx(ctx, mem, size, &gpu_va, map_flags, 1, nodes_array);
if (result != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemory(mem, size);
hsaKmtFreeMemoryCtx(ctx, mem, size);
return NULL;
}
return mem;
}
if (hsaKmtMapMemoryToGPU(mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemory(mem, size);
if (hsaKmtMapMemoryToGPUCtx(ctx, mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemoryCtx(ctx, mem, size);
return NULL;
}
return mem;
}
void hsakmt_free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align)
void hsakmt_free_exec_aligned_memory_gpu(HsaKFDContext *ctx, void *addr, uint32_t size, uint32_t align)
{
size = ALIGN_UP(size, align);
if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS)
hsaKmtFreeMemory(addr, size);
if (hsaKmtUnmapMemoryToGPUCtx(ctx, addr) == HSAKMT_STATUS_SUCCESS)
hsaKmtFreeMemoryCtx(ctx, addr, size);
}
/*
* Allocates memory aligned to sysconf(_SC_PAGESIZE)
*/
static void *allocate_exec_aligned_memory(uint32_t size,
static void *allocate_exec_aligned_memory(HsaKFDContext *ctx,
uint32_t size,
bool use_ats,
uint32_t gpu_id,
uint32_t NodeId,
@@ -403,17 +438,19 @@ static void *allocate_exec_aligned_memory(uint32_t size,
bool Uncached)
{
if (!use_ats)
return hsakmt_allocate_exec_aligned_memory_gpu(size, PAGE_SIZE, gpu_id, NodeId,
return hsakmt_allocate_exec_aligned_memory_gpu(ctx,
size, PAGE_SIZE, gpu_id, NodeId,
nonPaged, DeviceLocal,
Uncached);
return allocate_exec_aligned_memory_cpu(size);
}
static void free_exec_aligned_memory(void *addr, uint32_t size, uint32_t align,
static void free_exec_aligned_memory(HsaKFDContext *ctx,
void *addr, uint32_t size, uint32_t align,
bool use_ats)
{
if (!use_ats)
hsakmt_free_exec_aligned_memory_gpu(addr, size, align);
hsakmt_free_exec_aligned_memory_gpu(ctx, addr, size, align);
else
munmap(addr, size);
}
@@ -454,20 +491,20 @@ static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
}
static void free_queue(struct queue *q)
static void free_queue(HsaKFDContext *ctx, struct queue *q)
{
if (q->eop_buffer)
free_exec_aligned_memory(q->eop_buffer,
free_exec_aligned_memory(ctx, q->eop_buffer,
q->eop_buffer_size,
PAGE_SIZE, q->use_ats);
if (q->unified_ctx_save_restore)
munmap(q->ctx_save_restore, q->total_mem_alloc_size);
else if (q->ctx_save_restore)
free_exec_aligned_memory(q->ctx_save_restore,
free_exec_aligned_memory(ctx, q->ctx_save_restore,
q->total_mem_alloc_size,
PAGE_SIZE, q->use_ats);
free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->use_ats);
free_exec_aligned_memory(ctx, (void *)q, sizeof(*q), PAGE_SIZE, q->use_ats);
}
static inline void fill_cwsr_header(struct queue *q, void *addr,
@@ -488,7 +525,8 @@ static inline void fill_cwsr_header(struct queue *q, void *addr,
}
}
static int handle_concrete_asic(struct queue *q,
static int handle_concrete_asic(HsaKFDContext *ctx,
struct queue *q,
struct kfd_ioctl_create_queue_args *args,
uint32_t gpu_id,
uint32_t NodeId,
@@ -503,7 +541,8 @@ static int handle_concrete_asic(struct queue *q,
if (q->eop_buffer_size > 0) {
pr_info("Allocating VRAM for EOP\n");
q->eop_buffer = allocate_exec_aligned_memory(q->eop_buffer_size,
q->eop_buffer = allocate_exec_aligned_memory(ctx,
q->eop_buffer_size,
q->use_ats, gpu_id,
NodeId, true, true, /* Unused for VRAM */false);
if (!q->eop_buffer)
@@ -513,12 +552,12 @@ static int handle_concrete_asic(struct queue *q,
args->eop_buffer_size = q->eop_buffer_size;
}
ret = update_ctx_save_restore_size(NodeId, q);
ret = update_ctx_save_restore_size(ctx, NodeId, q);
if (ret) {
HsaNodeProperties node;
if (hsaKmtGetNodeProperties(NodeId, &node))
if (hsaKmtGetNodePropertiesCtx(ctx, NodeId, &node))
return HSAKMT_STATUS_ERROR;
args->ctx_save_restore_size = q->ctx_save_restore_size;
@@ -568,7 +607,7 @@ static int handle_concrete_asic(struct queue *q,
}
if (!q->unified_ctx_save_restore) {
q->ctx_save_restore = allocate_exec_aligned_memory(
q->ctx_save_restore = allocate_exec_aligned_memory(ctx,
q->total_mem_alloc_size,
q->use_ats, gpu_id, NodeId,
false, false, false);
@@ -591,24 +630,26 @@ static int handle_concrete_asic(struct queue *q,
*/
static uint32_t priority_map[] = {0, 3, 5, 7, 9, 11, 15};
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
{
if (Type == HSA_QUEUE_SDMA_BY_ENG_ID)
return HSAKMT_STATUS_ERROR;
return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0,
return hsaKmtCreateQueueExtCtx(ctx, NodeId, Type, QueuePercentage, Priority, 0,
QueueAddress, QueueSizeInBytes, Event,
QueueResource);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExtCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
@@ -628,6 +669,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
CHECK_KFD_OPEN();
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
return HSAKMT_STATUS_INVALID_PARAMETER;
@@ -636,7 +679,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
if (result != HSAKMT_STATUS_SUCCESS)
return result;
struct queue *q = allocate_exec_aligned_memory(sizeof(*q),
struct queue *q = allocate_exec_aligned_memory(ctx, sizeof(*q),
false, gpu_id, NodeId, true, false, true);
if (!q)
return HSAKMT_STATUS_NO_MEMORY;
@@ -656,7 +699,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
/* By default, CUs are all turned on. Initialize cu_mask to '1
* for all CU bits.
*/
if (hsaKmtGetNodeProperties(NodeId, &props))
if (hsaKmtGetNodePropertiesCtx(ctx, NodeId, &props))
q->cu_mask_count = 0;
else {
cu_num = props.NumFComputeCores / props.NumSIMDPerCU;
@@ -695,9 +738,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
}
err = handle_concrete_asic(q, &args, gpu_id, NodeId, Event, QueueResource->ErrorReason);
err = handle_concrete_asic(ctx, q, &args, gpu_id, NodeId, Event, QueueResource->ErrorReason);
if (err != HSAKMT_STATUS_SUCCESS) {
free_queue(q);
free_queue(ctx, q);
return err;
}
@@ -709,10 +752,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
args.queue_priority = priority_map[Priority+3];
args.sdma_engine_id = SdmaEngineId;
err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_CREATE_QUEUE, &args);
if (err == -1) {
free_queue(q);
free_queue(ctx, q);
return HSAKMT_STATUS_ERROR;
}
@@ -737,20 +780,21 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
doorbell_offset = q->queue_id * DOORBELL_SIZE(q->gfxv);
}
err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
err = map_doorbell(ctx, NodeId, gpu_id, doorbell_mmap_offset);
if (err != HSAKMT_STATUS_SUCCESS) {
hsaKmtDestroyQueue(q->queue_id);
hsaKmtDestroyQueueCtx(ctx, q->queue_id);
return HSAKMT_STATUS_ERROR;
}
QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].mapping,
QueueResource->Queue_DoorBell = VOID_PTR_ADD(queue_ctx->doorbells[NodeId].mapping,
doorbell_offset);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueueCtx(HsaKFDContext *ctx,
HSA_QUEUEID QueueId,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
@@ -774,7 +818,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
arg.queue_percentage = QueuePercentage;
arg.queue_priority = priority_map[Priority+3];
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
if (err == -1)
return HSAKMT_STATUS_ERROR;
@@ -782,7 +826,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueueCtx(HsaKFDContext *ctx,
HSA_QUEUEID QueueId)
{
CHECK_KFD_OPEN();
@@ -794,20 +839,21 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
args.queue_id = q->queue_id;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
if (err == -1) {
pr_err("Failed to destroy queue: %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
free_queue(q);
free_queue(ctx, q);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
HSAuint32 CUMaskCount,
HSAuint32 *QueueCUMask)
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMaskCtx(HsaKFDContext *ctx,
HSA_QUEUEID QueueId,
HSAuint32 CUMaskCount,
HSAuint32 *QueueCUMask)
{
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_set_cu_mask_args args = {0};
@@ -821,7 +867,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
args.num_cu_mask = CUMaskCount;
args.cu_mask_ptr = (uintptr_t)QueueCUMask;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_CU_MASK, &args);
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_CU_MASK, &args);
if (err == -1)
return HSAKMT_STATUS_ERROR;
@@ -832,12 +878,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetQueueInfo(
HSA_QUEUEID QueueId,
HsaQueueInfo *QueueInfo
)
HSAKMT_STATUS HSAKMTAPI hsaKmtGetQueueInfoCtx(HsaKFDContext *ctx,
HSA_QUEUEID QueueId,
HsaQueueInfo *QueueInfo)
{
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_get_queue_wave_state_args args = {0};
@@ -853,7 +896,7 @@ hsaKmtGetQueueInfo(
args.queue_id = q->queue_id;
args.ctl_stack_address = (uintptr_t)q->ctx_save_restore;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
return HSAKMT_STATUS_ERROR;
QueueInfo->ControlStackTop = (void *)(args.ctl_stack_address +
@@ -871,7 +914,8 @@ hsaKmtGetQueueInfo(
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandlerCtx(HsaKFDContext *ctx,
HSAuint32 Node,
void *TrapHandlerBaseAddress,
HSAuint64 TrapHandlerSizeInBytes,
void *TrapBufferBaseAddress,
@@ -891,7 +935,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
args.tba_addr = (uintptr_t)TrapHandlerBaseAddress;
args.tma_addr = (uintptr_t)TrapBufferBaseAddress;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}
@@ -921,12 +965,10 @@ uint32_t *hsakmt_convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues)
return queue_ids_ptr;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocQueueGWS(
HSA_QUEUEID QueueId,
HSAuint32 nGWS,
HSAuint32 *firstGWS)
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocQueueGWSCtx(HsaKFDContext *ctx,
HSA_QUEUEID QueueId,
HSAuint32 nGWS,
HSAuint32 *firstGWS)
{
struct kfd_ioctl_alloc_queue_gws_args args = {0};
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
@@ -936,7 +978,7 @@ hsaKmtAllocQueueGWS(
args.queue_id = (HSAuint32)q->queue_id;
args.num_gws = nGWS;
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_ALLOC_QUEUE_GWS, &args);
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_ALLOC_QUEUE_GWS, &args);
if (!err && firstGWS)
*firstGWS = args.first_gws;
@@ -952,3 +994,85 @@ hsaKmtAllocQueueGWS(
else
return HSAKMT_STATUS_ERROR;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
{
if (Type == HSA_QUEUE_SDMA_BY_ENG_ID)
return HSAKMT_STATUS_ERROR;
return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0,
QueueAddress, QueueSizeInBytes, Event,
QueueResource);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
HSAuint32 SdmaEngineId,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
{
return hsaKmtCreateQueueExtCtx(&hsakmt_primary_kfd_ctx, NodeId, Type,
QueuePercentage, Priority, SdmaEngineId, QueueAddress,
QueueSizeInBytes, Event, QueueResource);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSize,
HsaEvent *Event)
{
return hsaKmtUpdateQueueCtx(&hsakmt_primary_kfd_ctx, QueueId, QueuePercentage,
Priority, QueueAddress, QueueSize, Event);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
{
return hsaKmtDestroyQueueCtx(&hsakmt_primary_kfd_ctx, QueueId);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
HSAuint32 CUMaskCount,
HSAuint32 *QueueCUMask)
{
return hsaKmtSetQueueCUMaskCtx(&hsakmt_primary_kfd_ctx, QueueId, CUMaskCount, QueueCUMask);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetQueueInfo(
HSA_QUEUEID QueueId,
HsaQueueInfo *QueueInfo)
{
return hsaKmtGetQueueInfoCtx(&hsakmt_primary_kfd_ctx, QueueId, QueueInfo);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
void *TrapHandlerBaseAddress,
HSAuint64 TrapHandlerSizeInBytes,
void *TrapBufferBaseAddress,
HSAuint64 TrapBufferSizeInBytes)
{
return hsaKmtSetTrapHandlerCtx(&hsakmt_primary_kfd_ctx, Node,
TrapHandlerBaseAddress, TrapHandlerSizeInBytes,
TrapBufferBaseAddress, TrapBufferSizeInBytes);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocQueueGWS(HSA_QUEUEID QueueId,
HSAuint32 nGWS,
HSAuint32 *firstGWS)
{
return hsaKmtAllocQueueGWSCtx(&hsakmt_primary_kfd_ctx, QueueId, nGWS, firstGWS);
}
+3 -3
View File
@@ -45,7 +45,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMAcquire(HSAuint32 PreferredNode)
args.op = KFD_IOCTL_SPM_OP_ACQUIRE;
args.gpu_id = gpu_id;
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RLC_SPM, &args);
return ret;
}
@@ -72,7 +72,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode,
args.op = KFD_IOCTL_SPM_OP_SET_DEST_BUF;
args.gpu_id = gpu_id;
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RLC_SPM, &args);
*SizeCopied = args.bytes_copied;
*isSPMDataLoss = args.has_data_loss;
@@ -96,7 +96,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMRelease(HSAuint32 PreferredNode)
args.op = KFD_IOCTL_SPM_OP_RELEASE;
args.gpu_id = gpu_id;
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RLC_SPM, &args);
return ret;
}
+42 -6
View File
@@ -37,7 +37,8 @@
/* Helper functions for calling KFD SVM ioctl */
HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
hsaKmtSVMSetAttrCtx(HsaKFDContext *ctx,
void *start_addr, HSAuint64 size, unsigned int nattr,
HSA_SVM_ATTRIBUTE *attrs)
{
struct kfd_ioctl_svm_args *args;
@@ -94,7 +95,7 @@ hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
}
/* Driver does one copy_from_user, with extra attrs size */
r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
r = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
if (r) {
pr_debug("op set range attrs failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
@@ -104,7 +105,8 @@ hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
hsaKmtSVMGetAttrCtx(HsaKFDContext *ctx,
void *start_addr, HSAuint64 size, unsigned int nattr,
HSA_SVM_ATTRIBUTE *attrs)
{
struct kfd_ioctl_svm_args *args;
@@ -150,7 +152,7 @@ hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
}
/* Driver does one copy_from_user, with extra attrs size */
r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
r = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
if (r) {
pr_debug("op get range attrs failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
@@ -187,7 +189,7 @@ hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
}
static HSAKMT_STATUS
hsaKmtSetGetXNACKMode(HSAint32 * enable)
hsaKmtSetGetXNACKModeCtx(HsaKFDContext *ctx, HSAint32 * enable)
{
struct kfd_ioctl_set_xnack_mode_args args;
@@ -196,7 +198,7 @@ hsaKmtSetGetXNACKMode(HSAint32 * enable)
args.xnack_enabled = *enable;
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_XNACK_MODE, &args)) {
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_XNACK_MODE, &args)) {
if (errno == EPERM) {
pr_debug("set mode not supported %s\n",
strerror(errno));
@@ -213,6 +215,40 @@ hsaKmtSetGetXNACKMode(HSAint32 * enable)
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSetXNACKModeCtx(HsaKFDContext *ctx, HSAint32 enable)
{
return hsaKmtSetGetXNACKModeCtx(ctx, &enable);
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtGetXNACKModeCtx(HsaKFDContext *ctx, HSAint32 * enable)
{
*enable = -1;
return hsaKmtSetGetXNACKModeCtx(ctx, enable);
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
HSA_SVM_ATTRIBUTE *attrs)
{
return hsaKmtSVMSetAttrCtx(&hsakmt_primary_kfd_ctx, start_addr, size, nattr, attrs);
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
HSA_SVM_ATTRIBUTE *attrs)
{
return hsaKmtSVMGetAttrCtx(&hsakmt_primary_kfd_ctx, start_addr, size, nattr, attrs);
}
static HSAKMT_STATUS
hsaKmtSetGetXNACKMode(HSAint32 * enable)
{
return hsaKmtSetGetXNACKModeCtx(&hsakmt_primary_kfd_ctx, enable);
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSetXNACKMode(HSAint32 enable)
{
+1 -1
View File
@@ -42,7 +42,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
args.gpu_id = gpu_id;
err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
if (err < 0) {
result = HSAKMT_STATUS_ERROR;
} else {
+78 -29
View File
@@ -96,7 +96,7 @@ static const char *supported_processor_vendor_name[] = {
"\n" // POWER requires a different search method
};
static HSAKMT_STATUS topology_take_snapshot(void);
static HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx);
static void topology_drop_snapshot(void);
static const struct hsa_gfxip_table gfxip_lookup_table[] = {
@@ -645,7 +645,8 @@ static HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t sysfs_node_id, uint32_t
* - if corresponding drm render node is not available.
* - if node information is not accessible (EPERM)
*/
static HSAKMT_STATUS topology_sysfs_check_node_supported(uint32_t sysfs_node_id, bool *is_node_supported)
static HSAKMT_STATUS topology_sysfs_check_node_supported(HsaKFDContext *ctx,
uint32_t sysfs_node_id, bool *is_node_supported)
{
uint32_t gpu_id;
FILE *fd;
@@ -711,7 +712,7 @@ static HSAKMT_STATUS topology_sysfs_check_node_supported(uint32_t sysfs_node_id,
}
/* Open DRM Render device */
ret_value = hsakmt_open_drm_render_device(drm_render_minor);
ret_value = hsakmt_open_drm_render_device(ctx, drm_render_minor);
if (ret_value > 0)
*is_node_supported = true;
else if (ret_value != -ENOENT && ret_value != -EPERM)
@@ -723,7 +724,8 @@ err:
return ret;
}
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props)
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
HsaSystemProperties *props)
{
FILE *fd;
char *read_buf, *p;
@@ -800,7 +802,7 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props)
}
for (uint32_t i = 0; i < num_sysfs_nodes; i++) {
ret = topology_sysfs_check_node_supported(i, &is_node_supported);
ret = topology_sysfs_check_node_supported(ctx, i, &is_node_supported);
if (ret != HSAKMT_STATUS_SUCCESS)
goto sysfs_parse_failed;
if (is_node_supported)
@@ -1631,7 +1633,8 @@ static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(uint32_t sys_node_id, ui
* If node_to specified by the @iolink_id is not accessible the function returns HSAKMT_STATUS_NOT_SUPPORTED.
* If node_to is accessible, then node_to is mapped from sysfs_node to user_node and returns HSAKMT_STATUS_SUCCESS.
*/
static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
static HSAKMT_STATUS topology_sysfs_get_iolink_props(HsaKFDContext *ctx,
uint32_t node_id,
uint32_t iolink_id,
HsaIoLinkProperties *props, bool p2pLink)
{
@@ -1693,7 +1696,7 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
uint32_t sysfs_node_id;
sysfs_node_id = (uint32_t)prop_val;
ret = topology_sysfs_check_node_supported(sysfs_node_id, &is_node_supported);
ret = topology_sysfs_check_node_supported(ctx, sysfs_node_id, &is_node_supported);
if (!is_node_supported) {
ret = HSAKMT_STATUS_NOT_SUPPORTED;
memset(props, 0, sizeof(*props));
@@ -1955,7 +1958,7 @@ try_alt_dir:
}
}
HSAKMT_STATUS topology_take_snapshot(void)
HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx)
{
uint32_t gen_start, gen_end, i, mem_id, cache_id;
HsaSystemProperties sys_props;
@@ -1978,7 +1981,7 @@ retry:
ret = topology_sysfs_get_generation(&gen_start);
if (ret != HSAKMT_STATUS_SUCCESS)
goto err;
ret = hsakmt_topology_sysfs_get_system_props(&sys_props);
ret = hsakmt_topology_sysfs_get_system_props(ctx, &sys_props);
if (ret != HSAKMT_STATUS_SUCCESS)
goto err;
if (sys_props.NumNodes > 0) {
@@ -2059,7 +2062,7 @@ retry:
*/
while (sys_link_id < num_ioLinks &&
link_id < sys_props.NumNodes - 1) {
ret = topology_sysfs_get_iolink_props(i, sys_link_id++,
ret = topology_sysfs_get_iolink_props(ctx, i, sys_link_id++,
&temp_props[i].link[link_id], false);
if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
continue;
@@ -2080,7 +2083,7 @@ retry:
*/
while (sys_link_id < num_p2pLinks &&
link_id < sys_props.NumNodes - 1) {
ret = topology_sysfs_get_iolink_props(i, sys_link_id++,
ret = topology_sysfs_get_iolink_props(ctx, i, sys_link_id++,
&temp_props[i].link[link_id], true);
if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
continue;
@@ -2179,7 +2182,8 @@ HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
HsaSystemProperties *SystemProperties)
{
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
@@ -2198,7 +2202,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste
goto out;
}
err = topology_take_snapshot();
err = topology_take_snapshot(ctx);
if (err != HSAKMT_STATUS_SUCCESS)
goto out;
@@ -2207,11 +2211,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste
if (hsakmt_use_model)
model_init();
err = hsakmt_fmm_init_process_apertures(g_system->NumNodes);
err = hsakmt_fmm_init_process_apertures(ctx, g_system->NumNodes);
if (err != HSAKMT_STATUS_SUCCESS)
goto init_process_apertures_failed;
err = hsakmt_init_process_doorbells(g_system->NumNodes);
err = hsakmt_init_process_doorbells(ctx, g_system->NumNodes);
if (err != HSAKMT_STATUS_SUCCESS)
goto init_doorbells_failed;
@@ -2220,7 +2224,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste
goto out;
init_doorbells_failed:
hsakmt_fmm_destroy_process_apertures();
hsakmt_fmm_destroy_process_apertures(ctx);
init_process_apertures_failed:
topology_drop_snapshot();
@@ -2229,12 +2233,12 @@ out:
return err;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemPropertiesCtx(HsaKFDContext *ctx)
{
pthread_mutex_lock(&hsakmt_mutex);
hsakmt_destroy_process_doorbells();
hsakmt_fmm_destroy_process_apertures();
hsakmt_destroy_process_doorbells(ctx);
hsakmt_fmm_destroy_process_apertures(ctx);
topology_drop_snapshot();
pthread_mutex_unlock(&hsakmt_mutex);
@@ -2252,7 +2256,9 @@ HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodePropertiesCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HsaNodeProperties *NodeProperties)
{
HSAKMT_STATUS err;
@@ -2278,7 +2284,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
NodeProperties->NumMemoryBanks += NUM_OF_DGPU_HEAPS;
else
NodeProperties->NumMemoryBanks += NUM_OF_IGPU_HEAPS;
if (hsakmt_fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id, &base,
if (hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_MMIO, gpu_id, &base,
&limit) == HSAKMT_STATUS_SUCCESS)
NodeProperties->NumMemoryBanks += 1;
}
@@ -2288,7 +2294,8 @@ out:
return err;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSAuint32 NumBanks,
HsaMemoryProperties *MemoryProperties)
{
@@ -2319,7 +2326,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
/*Add LDS*/
if (i < NumBanks &&
hsakmt_fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id,
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_LDS, gpu_id,
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LDSSizeInKB * 1024;
@@ -2332,7 +2339,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
*/
if (hsakmt_get_gfxv_by_node_id(NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
g_props[NodeId].node.LocalMemSize > 0 &&
hsakmt_fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id,
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_GPUVM, gpu_id,
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LocalMemSize;
@@ -2341,7 +2348,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
/* Add SCRATCH */
if (i < NumBanks &&
hsakmt_fmm_get_aperture_base_and_limit(FMM_SCRATCH, gpu_id,
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_SCRATCH, gpu_id,
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_SCRATCH;
MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
@@ -2350,7 +2357,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
/* Add SVM aperture */
if (hsakmt_topology_is_svm_needed(g_props[NodeId].node.EngineId) && i < NumBanks &&
hsakmt_fmm_get_aperture_base_and_limit(
hsakmt_fmm_get_aperture_base_and_limit(ctx,
FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
&aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_DEVICE_SVM;
@@ -2360,7 +2367,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
/* Add mmio aperture */
if (i < NumBanks &&
hsakmt_fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id,
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_MMIO, gpu_id,
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
MemoryProperties[i].HeapType = HSA_HEAPTYPE_MMIO_REMAP;
MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
@@ -2372,7 +2379,8 @@ out:
return err;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCachePropertiesCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSAuint32 ProcessorId,
HSAuint32 NumCaches,
HsaCacheProperties *CacheProperties)
@@ -2422,7 +2430,8 @@ HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkPropertiesCtx(HsaKFDContext *ctx,
HSAuint32 NodeId,
HSAuint32 NumIoLinks,
HsaIoLinkProperties *IoLinkProperties)
{
@@ -2536,3 +2545,43 @@ inline uint32_t hsakmt_get_num_sysfs_nodes(void)
{
return num_sysfs_nodes;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
{
return hsaKmtAcquireSystemPropertiesCtx(&hsakmt_primary_kfd_ctx, SystemProperties);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
{
return hsaKmtReleaseSystemPropertiesCtx(&hsakmt_primary_kfd_ctx);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
HsaNodeProperties *NodeProperties)
{
return hsaKmtGetNodePropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, NodeProperties);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
HSAuint32 NumBanks,
HsaMemoryProperties *MemoryProperties)
{
return hsaKmtGetNodeMemoryPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, NumBanks, MemoryProperties);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
HSAuint32 ProcessorId,
HSAuint32 NumCaches,
HsaCacheProperties *CacheProperties)
{
return hsaKmtGetNodeCachePropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, ProcessorId, NumCaches, CacheProperties);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
HSAuint32 NumIoLinks,
HsaIoLinkProperties *IoLinkProperties)
{
return hsaKmtGetNodeIoLinkPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, NumIoLinks, IoLinkProperties);
}
@@ -43,7 +43,7 @@ HSAKMT_STATUS hsakmt_init_kfd_version(void)
{
struct kfd_ioctl_get_version_args args = {0};
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
return HSAKMT_STATUS_ERROR;
hsakmt_kfd_version_info.KernelInterfaceMajorVersion = args.major_version;