libhsakmt: Refactor for Multi-KFD Context Support (Multiple KFD FDs per Process) (#1701)
* Introduce HsaKFDContext structure and infrastructure for multiple KFD contexts, enabling independent contexts within a single process. * Refactor core components (queue, event, FMM, topology) to be context-aware, using explicit HsaKFDContext parameters instead of global state. * Replace global hsakmt_kfd_fd with context-specific file descriptors, ensuring full context isolation. * Maintain backward compatibility by redirecting legacy APIs to use the primary context. This refactoring establishes a foundation for multi-context support while preserving existing functionality. Signed-off-by: Junhua Shen <Junhua.Shen@amd.com>
This commit is contained in:
@@ -130,7 +130,8 @@ set ( HSAKMT_SRC "src/debug.c"
|
||||
"src/version.c"
|
||||
"src/svm.c"
|
||||
"src/pc_sampling.c"
|
||||
"src/ais.c")
|
||||
"src/ais.c"
|
||||
"src/kfdcontext.c")
|
||||
|
||||
## Declare the library target name
|
||||
add_library (${HSAKMT_TARGET} STATIC "")
|
||||
|
||||
@@ -47,7 +47,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile(void *MemoryAddress,
|
||||
/* Support is only for dGPUs */
|
||||
|
||||
|
||||
if (!hsakmt_fmm_get_handle(MemoryAddress, &handle, &size_offset)) {
|
||||
if (!hsakmt_fmm_get_handle(&hsakmt_primary_kfd_ctx, MemoryAddress, &handle, &size_offset)) {
|
||||
pr_err("Address/size out of range: %p/%lu\n", MemoryAddress, MemorySizeInBytes);
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
@@ -66,7 +66,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile(void *MemoryAddress,
|
||||
}
|
||||
|
||||
args.in.handle_offset = size_offset;
|
||||
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AIS_OP, &args);
|
||||
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_AIS_OP, &args);
|
||||
|
||||
if (SizeCopiedInBytes)
|
||||
*SizeCopiedInBytes = args.out.size_copied;
|
||||
|
||||
@@ -78,7 +78,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
|
||||
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_REGISTER_DEPRECATED, &args);
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_REGISTER_DEPRECATED, &args);
|
||||
|
||||
if (err == 0)
|
||||
result = HSAKMT_STATUS_SUCCESS;
|
||||
@@ -105,7 +105,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId)
|
||||
struct kfd_ioctl_dbg_unregister_args args = {0};
|
||||
|
||||
args.gpu_id = gpu_id;
|
||||
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, &args);
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, &args);
|
||||
|
||||
if (err)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -168,7 +168,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId,
|
||||
run_ptr += sizeof(DbgWaveMsgRing->MemoryVA);
|
||||
|
||||
/* send to kernel */
|
||||
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, args);
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, args);
|
||||
|
||||
free(args);
|
||||
|
||||
@@ -256,7 +256,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
|
||||
}
|
||||
|
||||
/* send to kernel */
|
||||
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, args);
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, args);
|
||||
|
||||
free(args);
|
||||
|
||||
@@ -316,7 +316,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
|
||||
((setupTtmp) ? KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK : 0);
|
||||
args.r_debug = (HSAuint64)rDebug;
|
||||
|
||||
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
|
||||
|
||||
if (err) {
|
||||
if (errno == EBUSY)
|
||||
@@ -340,7 +340,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
|
||||
memset(&args, 0x00, sizeof(args));
|
||||
args.mode_mask = 0; //Disable
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
@@ -363,7 +363,7 @@ static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
|
||||
args.device_snapshot.entry_size = entry_size;
|
||||
args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
|
||||
args.pid = getpid();
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
*n_entries = args.device_snapshot.num_devices;
|
||||
|
||||
@@ -384,7 +384,7 @@ static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
|
||||
args.queue_snapshot.snapshot_buf_ptr = (uint64_t) data;
|
||||
args.pid = getpid();
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
*n_entries = args.queue_snapshot.num_queues;
|
||||
@@ -410,7 +410,7 @@ static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
|
||||
args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
|
||||
args.pid = getpid();
|
||||
|
||||
r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args);
|
||||
r = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args);
|
||||
if (r < 0)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
@@ -429,7 +429,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
|
||||
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
|
||||
*data_size = sizeof(struct kfd_runtime_info);
|
||||
args.enable.rinfo_size = *data_size;
|
||||
args.enable.dbg_fd = hsakmt_kfd_fd;
|
||||
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
|
||||
*runtime_info = malloc(args.enable.rinfo_size);
|
||||
if (!*runtime_info)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
@@ -437,7 +437,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
|
||||
args.op = KFD_IOC_DBG_TRAP_ENABLE;
|
||||
args.pid = getpid();
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args)) {
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args)) {
|
||||
free(*runtime_info);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
@@ -450,11 +450,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
CHECK_KFD_MINOR_VERSION(KFD_MINOR_MIN_DEBUG);
|
||||
args.enable.dbg_fd = hsakmt_kfd_fd;
|
||||
args.enable.dbg_fd = hsakmt_primary_kfd_ctx.fd;
|
||||
args.op = KFD_IOC_DBG_TRAP_DISABLE;
|
||||
args.pid = getpid();
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
@@ -540,7 +540,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *arg
|
||||
free(queue_ids);
|
||||
}
|
||||
|
||||
long err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DBG_TRAP, args);
|
||||
long err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_DBG_TRAP, args);
|
||||
if (DebugReturn)
|
||||
*DebugReturn = err;
|
||||
|
||||
|
||||
@@ -34,12 +34,36 @@
|
||||
#include "hsakmt/linux/kfd_ioctl.h"
|
||||
#include "fmm.h"
|
||||
#include "hsakmt/hsakmtmodel.h"
|
||||
#include <assert.h>
|
||||
|
||||
static HSAuint64 *events_page = NULL;
|
||||
|
||||
void hsakmt_clear_events_page(void)
|
||||
struct hsa_kfd_event_context
|
||||
{
|
||||
events_page = NULL;
|
||||
HSAuint64 *events_page;
|
||||
};
|
||||
|
||||
struct hsa_kfd_event_context *hsakmt_kfdcontext_get_event_context(HsaKFDContext *ctx)
|
||||
{
|
||||
assert(ctx);
|
||||
|
||||
if (ctx->event_context)
|
||||
return ctx->event_context;
|
||||
|
||||
ctx->event_context = calloc(1, sizeof(struct hsa_kfd_event_context));
|
||||
if (!ctx->event_context) {
|
||||
pr_err("Alloc memory failed for struct hsa_kfd_event_context size %zu\n",
|
||||
sizeof(struct hsa_kfd_event_context));
|
||||
return NULL;
|
||||
}
|
||||
return ctx->event_context;
|
||||
}
|
||||
|
||||
void hsakmt_clear_events_page(HsaKFDContext *ctx)
|
||||
{
|
||||
struct hsa_kfd_event_context *event_ctx = hsakmt_kfdcontext_get_event_context(ctx);
|
||||
if (event_ctx) {
|
||||
event_ctx->events_page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsSystemEventType(HSA_EVENTTYPE type)
|
||||
@@ -48,14 +72,18 @@ static bool IsSystemEventType(HSA_EVENTTYPE type)
|
||||
return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
bool ManualReset, bool IsSignaled,
|
||||
HsaEvent **Event)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEventCtx(HsaKFDContext *ctx,
|
||||
HsaEventDescriptor *EventDesc,
|
||||
bool ManualReset, bool IsSignaled,
|
||||
HsaEvent **Event)
|
||||
{
|
||||
unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
struct hsa_kfd_event_context *event_ctx = NULL;
|
||||
HSAuint64 *events_page = NULL;
|
||||
|
||||
if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
@@ -74,9 +102,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
|
||||
/* dGPU code */
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
event_ctx = hsakmt_kfdcontext_get_event_context(ctx);
|
||||
events_page = event_ctx->events_page;
|
||||
|
||||
if (hsakmt_is_dgpu && !events_page) {
|
||||
events_page = hsakmt_allocate_exec_aligned_memory_gpu(
|
||||
events_page = hsakmt_allocate_exec_aligned_memory_gpu(ctx,
|
||||
KFD_SIGNAL_EVENT_LIMIT * 8, PAGE_SIZE, 0, 0, true, false, true);
|
||||
if (!events_page) {
|
||||
free(e);
|
||||
@@ -86,10 +116,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
if (hsakmt_use_model)
|
||||
model_set_event_page(events_page, KFD_SIGNAL_EVENT_LIMIT);
|
||||
else
|
||||
hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset, NULL);
|
||||
hsakmt_fmm_get_handle(ctx, events_page, (uint64_t *)&args.event_page_offset, NULL);
|
||||
}
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
|
||||
free(e);
|
||||
*Event = NULL;
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
@@ -100,17 +130,17 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
|
||||
if (!events_page && args.event_page_offset > 0) {
|
||||
events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ,
|
||||
MAP_SHARED, hsakmt_kfd_fd, args.event_page_offset);
|
||||
MAP_SHARED, ctx->fd, args.event_page_offset);
|
||||
if (events_page == MAP_FAILED) {
|
||||
/* old kernels only support 256 events */
|
||||
event_limit = 256;
|
||||
events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ,
|
||||
MAP_SHARED, hsakmt_kfd_fd, args.event_page_offset);
|
||||
MAP_SHARED, ctx->fd, args.event_page_offset);
|
||||
}
|
||||
if (events_page == MAP_FAILED) {
|
||||
events_page = NULL;
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
hsaKmtDestroyEvent(e);
|
||||
hsaKmtDestroyEventCtx(ctx, e);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
@@ -118,10 +148,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
if (args.event_page_offset > 0 && args.event_slot_index < event_limit)
|
||||
e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];
|
||||
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
|
||||
e->EventData.EventType = EventDesc->EventType;
|
||||
e->EventData.HWData1 = args.event_id;
|
||||
e->EventData.EventType = EventDesc->EventType;
|
||||
e->EventData.HWData1 = args.event_id;
|
||||
|
||||
e->EventData.HWData3 = args.event_trigger_data;
|
||||
e->EventData.EventData.SyncVar.SyncVar.UserData =
|
||||
@@ -134,19 +164,21 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
|
||||
set_args.event_id = args.event_id;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_EVENT,
|
||||
&set_args) != 0) {
|
||||
hsaKmtDestroyEvent(e);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_EVENT, &set_args) != 0) {
|
||||
hsaKmtDestroyEventCtx(ctx, e);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
*Event = e;
|
||||
*Event = e;
|
||||
if (!event_ctx->events_page)
|
||||
event_ctx->events_page = events_page;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEventCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Event)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -157,14 +189,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
|
||||
|
||||
args.event_id = Event->EventId;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
free(Event);
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEventCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Event)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -181,13 +214,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
|
||||
|
||||
args.event_id = Event->EventId;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEventCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Event)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -204,13 +238,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
|
||||
|
||||
args.event_id = Event->EventId;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventStateCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Event)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -220,22 +255,25 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
|
||||
HSAuint32 Milliseconds)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEventCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Event,
|
||||
HSAuint32 Milliseconds)
|
||||
{
|
||||
return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
|
||||
return hsaKmtWaitOnEvent_ExtCtx(ctx, Event, Milliseconds, NULL);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
|
||||
HSAuint32 Milliseconds, uint64_t *event_age)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_ExtCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Event,
|
||||
HSAuint32 Milliseconds, uint64_t *event_age)
|
||||
{
|
||||
if (!Event)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1, true, Milliseconds, event_age);
|
||||
return hsaKmtWaitOnMultipleEvents_ExtCtx(ctx, &Event,
|
||||
1, true, Milliseconds, event_age);
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
|
||||
static HSAKMT_STATUS get_mem_info_svm_api(HsaKFDContext *ctx, uint64_t address, uint32_t gpu_id)
|
||||
{
|
||||
struct kfd_ioctl_svm_args *args;
|
||||
uint32_t node_id = 0;
|
||||
@@ -258,7 +296,7 @@ static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
|
||||
args->op = KFD_IOCTL_SVM_OP_GET_ATTR;
|
||||
args->nattr = s_attr / sizeof(*attrs);
|
||||
memcpy(args->attrs, attrs, s_attr);
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
|
||||
pr_debug("op get range attrs failed %s\n", strerror(errno));
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
@@ -312,8 +350,8 @@ static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
//Analysis memory exception data, print debug messages
|
||||
static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
|
||||
memory_exception_data)
|
||||
static void analysis_memory_exception(HsaKFDContext *ctx,
|
||||
struct kfd_hsa_memory_exception_data *memory_exception_data)
|
||||
{
|
||||
HSAKMT_STATUS ret;
|
||||
HsaPointerInfo info;
|
||||
@@ -331,9 +369,9 @@ static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
|
||||
else if (memory_exception_data->failure.NoExecute)
|
||||
pr_err("Execute to none-executable page\n");
|
||||
|
||||
ret = hsakmt_fmm_get_mem_info((const void *)addr, &info);
|
||||
ret = hsakmt_fmm_get_mem_info(ctx, (const void *)addr, &info);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
ret = get_mem_info_svm_api(addr, memory_exception_data->gpu_id);
|
||||
ret = get_mem_info_svm_api(ctx, addr, memory_exception_data->gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
pr_err("Address does not belong to a known buffer\n");
|
||||
return;
|
||||
@@ -378,19 +416,22 @@ static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
|
||||
HSAuint32 NumEvents,
|
||||
bool WaitOnAll,
|
||||
HSAuint32 Milliseconds)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEventsCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Events[],
|
||||
HSAuint32 NumEvents,
|
||||
bool WaitOnAll,
|
||||
HSAuint32 Milliseconds)
|
||||
{
|
||||
return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents, WaitOnAll, Milliseconds, NULL);
|
||||
return hsaKmtWaitOnMultipleEvents_ExtCtx(ctx, Events,
|
||||
NumEvents, WaitOnAll, Milliseconds, NULL);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
|
||||
HSAuint32 NumEvents,
|
||||
bool WaitOnAll,
|
||||
HSAuint32 Milliseconds,
|
||||
uint64_t *event_age)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_ExtCtx(HsaKFDContext *ctx,
|
||||
HsaEvent *Events[],
|
||||
HSAuint32 NumEvents,
|
||||
bool WaitOnAll,
|
||||
HSAuint32 Milliseconds,
|
||||
uint64_t *event_age)
|
||||
{
|
||||
HSAKMT_STATUS result;
|
||||
CHECK_KFD_OPEN();
|
||||
@@ -417,7 +458,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
|
||||
args.num_events = NumEvents;
|
||||
args.events_ptr = (uint64_t)(uintptr_t)event_data;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
|
||||
result = HSAKMT_STATUS_ERROR;
|
||||
else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT)
|
||||
result = HSAKMT_STATUS_WAIT_TIMEOUT;
|
||||
@@ -438,7 +479,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
|
||||
Events[i]->EventData.EventData.MemoryAccessFault.Failure.ECC =
|
||||
((event_data[i].memory_exception_data.ErrorType == 1) || (event_data[i].memory_exception_data.ErrorType == 2)) ? 1 : 0;
|
||||
Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
|
||||
analysis_memory_exception(&event_data[i].memory_exception_data);
|
||||
analysis_memory_exception(ctx, &event_data[i].memory_exception_data);
|
||||
} else if (Events[i]->EventData.EventType == HSA_EVENTTYPE_HW_EXCEPTION &&
|
||||
event_data[i].hw_exception_data.gpu_id) {
|
||||
|
||||
@@ -464,7 +505,7 @@ out:
|
||||
return result;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMICtx(HsaKFDContext *ctx, HSAuint32 NodeId, int *fd)
|
||||
{
|
||||
struct kfd_ioctl_smi_events_args args;
|
||||
HSAKMT_STATUS result;
|
||||
@@ -481,7 +522,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
|
||||
}
|
||||
|
||||
args.gpuid = gpuid;
|
||||
result = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SMI_EVENTS, &args);
|
||||
result = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SMI_EVENTS, &args);
|
||||
if (result) {
|
||||
pr_debug("open SMI event fd failed %s\n", strerror(errno));
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -490,3 +531,73 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
|
||||
*fd = args.anon_fd;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
|
||||
bool ManualReset, bool IsSignaled,
|
||||
HsaEvent **Event)
|
||||
{
|
||||
return hsaKmtCreateEventCtx(&hsakmt_primary_kfd_ctx, EventDesc, ManualReset,
|
||||
IsSignaled, Event);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
|
||||
{
|
||||
return hsaKmtDestroyEventCtx(&hsakmt_primary_kfd_ctx, Event);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
|
||||
{
|
||||
return hsaKmtSetEventCtx(&hsakmt_primary_kfd_ctx, Event);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
|
||||
{
|
||||
return hsaKmtResetEventCtx(&hsakmt_primary_kfd_ctx, Event);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
|
||||
{
|
||||
return hsaKmtQueryEventStateCtx(&hsakmt_primary_kfd_ctx, Event);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
|
||||
HSAuint32 Milliseconds)
|
||||
{
|
||||
return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
|
||||
HSAuint32 Milliseconds, uint64_t *event_age)
|
||||
{
|
||||
if (!Event)
|
||||
return HSAKMT_STATUS_INVALID_HANDLE;
|
||||
|
||||
return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1,
|
||||
true, Milliseconds, event_age);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
|
||||
HSAuint32 NumEvents,
|
||||
bool WaitOnAll,
|
||||
HSAuint32 Milliseconds)
|
||||
{
|
||||
return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents,
|
||||
WaitOnAll, Milliseconds, NULL);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
|
||||
HSAuint32 NumEvents,
|
||||
bool WaitOnAll,
|
||||
HSAuint32 Milliseconds,
|
||||
uint64_t *event_age)
|
||||
{
|
||||
return hsaKmtWaitOnMultipleEvents_ExtCtx(&hsakmt_primary_kfd_ctx,
|
||||
Events, NumEvents, WaitOnAll, Milliseconds, event_age);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
|
||||
{
|
||||
return hsaKmtOpenSMICtx(&hsakmt_primary_kfd_ctx, NodeId, fd);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -45,59 +45,113 @@ typedef struct {
|
||||
void *start_address;
|
||||
} aperture_properties_t;
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_get_amdgpu_device_handle(uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
|
||||
HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes);
|
||||
void hsakmt_fmm_destroy_process_apertures(void);
|
||||
HSAKMT_STATUS hsakmt_fmm_get_amdgpu_device_handle(HsaKFDContext *ctx,
|
||||
uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
|
||||
HSAKMT_STATUS hsakmt_fmm_init_process_apertures(HsaKFDContext *ctx, unsigned int NumNodes);
|
||||
void hsakmt_fmm_destroy_process_apertures(HsaKFDContext *ctx);
|
||||
|
||||
/* Memory interface */
|
||||
void *hsakmt_fmm_allocate_scratch(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes);
|
||||
void *hsakmt_fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address,
|
||||
uint64_t MemorySizeInBytes, uint64_t alignment, HsaMemFlags flags);
|
||||
void *hsakmt_fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset);
|
||||
void *hsakmt_fmm_allocate_host(uint32_t gpu_id, uint32_t node_id, void *address, uint64_t MemorySizeInBytes,
|
||||
uint64_t alignment, HsaMemFlags flags);
|
||||
void hsakmt_fmm_print(uint32_t node);
|
||||
HSAKMT_STATUS hsakmt_fmm_release(void *address);
|
||||
HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
|
||||
int hsakmt_fmm_unmap_from_gpu(void *address);
|
||||
bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offset);
|
||||
HSAKMT_STATUS hsakmt_fmm_get_mem_info(const void *address, HsaPointerInfo *info);
|
||||
HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(const void *mem, void *usr_data);
|
||||
// Memory allocation/free functions
|
||||
void *hsakmt_fmm_allocate_scratch(HsaKFDContext *ctx,
|
||||
uint32_t gpu_id,
|
||||
void *address,
|
||||
uint64_t MemorySizeInBytes);
|
||||
|
||||
void *hsakmt_fmm_allocate_device(HsaKFDContext *ctx,
|
||||
uint32_t gpu_id,
|
||||
uint32_t node_id,
|
||||
void *address,
|
||||
uint64_t MemorySizeInBytes,
|
||||
uint64_t alignment,
|
||||
HsaMemFlags flags);
|
||||
|
||||
void *hsakmt_fmm_allocate_host(HsaKFDContext *ctx,
|
||||
uint32_t gpu_id,
|
||||
uint32_t node_id,
|
||||
void *address,
|
||||
uint64_t MemorySizeInBytes,
|
||||
uint64_t alignment,
|
||||
HsaMemFlags flags);
|
||||
|
||||
void *hsakmt_fmm_allocate_doorbell(HsaKFDContext *ctx,
|
||||
uint32_t gpu_id,
|
||||
uint64_t MemorySizeInBytes,
|
||||
uint64_t doorbell_offset);
|
||||
|
||||
void hsakmt_fmm_print(HsaKFDContext *ctx, uint32_t node);
|
||||
HSAKMT_STATUS hsakmt_fmm_release(HsaKFDContext *ctx, void *address);
|
||||
|
||||
// Memory mmap/munmap functions
|
||||
HSAKMT_STATUS hsakmt_fmm_map_to_gpu(HsaKFDContext *ctx,
|
||||
void *address,
|
||||
uint64_t size,
|
||||
uint64_t *gpuvm_address);
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_map_to_gpu_nodes(HsaKFDContext *ctx,
|
||||
void *address,
|
||||
uint64_t size,
|
||||
uint32_t *nodes_to_map,
|
||||
uint64_t num_of_nodes,
|
||||
uint64_t *gpuvm_address);
|
||||
|
||||
int hsakmt_fmm_unmap_from_gpu(HsaKFDContext *ctx, void *address);
|
||||
|
||||
// Memory register/deregister functions
|
||||
HSAKMT_STATUS hsakmt_fmm_register_memory(HsaKFDContext *ctx,
|
||||
void *address, uint64_t size_in_bytes,
|
||||
uint32_t *gpu_id_array,
|
||||
uint32_t gpu_id_array_size,
|
||||
HsaMemFlags flags);
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HsaKFDContext *ctx,
|
||||
HSAuint64 GraphicsResourceHandle,
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo,
|
||||
uint32_t *gpu_id_array,
|
||||
uint32_t gpu_id_array_size,
|
||||
HSA_REGISTER_MEM_FLAGS RegisterFlags);
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_deregister_memory(HsaKFDContext *ctx, void *address);
|
||||
|
||||
// Memory export functions
|
||||
HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
int *DMABufFd,
|
||||
HSAuint64 *Offset);
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_share_memory(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 SizeInBytes,
|
||||
HsaSharedMemoryHandle *SharedMemoryHandle);
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_register_shared_memory(HsaKFDContext *ctx,
|
||||
const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
HSAuint64 *SizeInBytes,
|
||||
void **MemoryAddress,
|
||||
uint32_t *gpu_id_array,
|
||||
uint32_t gpu_id_array_size);
|
||||
|
||||
bool hsakmt_fmm_get_handle(HsaKFDContext *ctx,
|
||||
void *address,
|
||||
uint64_t *handle,
|
||||
uint64_t *size_offset);
|
||||
HSAKMT_STATUS hsakmt_fmm_get_mem_info(HsaKFDContext *ctx,
|
||||
const void *address,
|
||||
HsaPointerInfo *info);
|
||||
HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(HsaKFDContext *ctx,
|
||||
const void *mem,
|
||||
void *usr_data);
|
||||
#ifdef SANITIZER_AMDGPU
|
||||
HSAKMT_STATUS hsakmt_fmm_replace_asan_header_page(void* address);
|
||||
HSAKMT_STATUS hsakmt_fmm_return_asan_header_page(void* address);
|
||||
HSAKMT_STATUS hsakmt_fmm_replace_asan_header_page(HsaKFDContext *ctx, void* address);
|
||||
HSAKMT_STATUS hsakmt_fmm_return_asan_header_page(HsaKFDContext *ctx, void* address);
|
||||
#endif
|
||||
|
||||
/* Topology interface*/
|
||||
HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSAuint32 gpu_id,
|
||||
HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(HsaKFDContext *ctx,
|
||||
aperture_type_e aperture_type, HSAuint32 gpu_id,
|
||||
HSAuint64 *aperture_base, HSAuint64 *aperture_limit);
|
||||
|
||||
HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
|
||||
uint32_t *gpu_id_array,
|
||||
uint32_t gpu_id_array_size,
|
||||
HsaMemFlags flags);
|
||||
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo,
|
||||
uint32_t *gpu_id_array,
|
||||
uint32_t gpu_id_array_size,
|
||||
HSA_REGISTER_MEM_FLAGS RegisterFlags);
|
||||
HSAKMT_STATUS hsakmt_fmm_deregister_memory(void *address);
|
||||
HSAKMT_STATUS hsakmt_fmm_export_dma_buf_fd(void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
int *DMABufFd,
|
||||
HSAuint64 *Offset);
|
||||
HSAKMT_STATUS hsakmt_fmm_share_memory(void *MemoryAddress,
|
||||
HSAuint64 SizeInBytes,
|
||||
HsaSharedMemoryHandle *SharedMemoryHandle);
|
||||
HSAKMT_STATUS hsakmt_fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
HSAuint64 *SizeInBytes,
|
||||
void **MemoryAddress,
|
||||
uint32_t *gpu_id_array,
|
||||
uint32_t gpu_id_array_size);
|
||||
HSAKMT_STATUS hsakmt_fmm_map_to_gpu_nodes(void *address, uint64_t size,
|
||||
uint32_t *nodes_to_map, uint64_t num_of_nodes, uint64_t *gpuvm_address);
|
||||
|
||||
int hsakmt_open_drm_render_device(int minor);
|
||||
int hsakmt_open_drm_render_device(HsaKFDContext *ctx, int minor);
|
||||
void *hsakmt_mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
|
||||
uint64_t guard_size, void *aper_base, void *aper_limit, int fd);
|
||||
|
||||
|
||||
@@ -27,10 +27,8 @@
|
||||
|
||||
// HSAKMT global data
|
||||
|
||||
int hsakmt_kfd_fd = -1;
|
||||
int hsakmt_udmabuf_dev_fd = -1;
|
||||
unsigned long hsakmt_kfd_open_count;
|
||||
unsigned long hsakmt_system_properties_count;
|
||||
pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
bool hsakmt_is_dgpu;
|
||||
|
||||
|
||||
@@ -0,0 +1,827 @@
|
||||
/*
|
||||
* Copyright © 2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including
|
||||
* the next paragraph) shall be included in all copies or substantial
|
||||
* portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _HSAKMTCTX_H_
|
||||
#define _HSAKMTCTX_H_
|
||||
|
||||
#include "hsakmt/hsakmttypes.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _HsaKFDContext HsaKFDContext;
|
||||
|
||||
/**
|
||||
The context-aware version for openning the kfd device.
|
||||
|
||||
"Opens" the HSA kernel driver for user-kernel mode communication.
|
||||
|
||||
On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
|
||||
is responsible for user-kernel communication, this handle is used internally by
|
||||
the thunk library to send device I/O control to the HSA kernel driver.
|
||||
No other thunk library function may be called unless the user-kernel communication
|
||||
channel is opened first.
|
||||
|
||||
On Linux this call opens the "/dev/kfd" device file to establish a communication
|
||||
path to the kernel.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtOpenKFDCtx(
|
||||
HsaKFDContext **pCtx //IN/OUT
|
||||
);
|
||||
|
||||
/**
|
||||
The context-aware version for closing the kfd device.
|
||||
|
||||
"Closes" the user-kernel communication path.
|
||||
|
||||
On Windows, the handle obtained by the hsaKmtOpenKFDCtx() function is closed;
|
||||
no other communication with the kernel driver is possible after the successful
|
||||
execution of the hsaKmtCloseKFDCtx() function. Depending on the failure reason,
|
||||
the user-kernel communication path may or may not be still active.
|
||||
|
||||
On Linux the function closes the "dev/kfd" device file.
|
||||
No further communication to the kernel driver is allowed until hsaKmtOpenKFDCtx()
|
||||
function is called again.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCloseKFDCtx( void );
|
||||
|
||||
/**
|
||||
The function takes a "snapshot" of the topology information within the KFD
|
||||
to avoid any changes during the enumeration process.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAcquireSystemPropertiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaSystemProperties* SystemProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtReleaseSystemPropertiesCtx(
|
||||
HsaKFDContext *ctx //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the discoverable sub-properties for a given HSA
|
||||
node. The parameters returned allow the application or runtime to size the
|
||||
management structures necessary to store the information.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodePropertiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaNodeProperties* NodeProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the memory properties of a specific HSA node.
|
||||
the memory pointer passed as MemoryProperties is sized as
|
||||
NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
|
||||
hsaKmtGetNodePropertiesCtx() call.
|
||||
|
||||
Some of the data returned is optional. Not all implementations may return all
|
||||
parameters in the hsaMemoryProperties.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeMemoryPropertiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumBanks, //IN
|
||||
HsaMemoryProperties* MemoryProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the cache properties of a specific HSA node and processor ID.
|
||||
ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
|
||||
via the hsaKmtGetNodePropertiesCtx() call.
|
||||
The memory pointer passed as CacheProperties is sized as
|
||||
NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
|
||||
hsaKmtGetNodePropertiesCtx() call.
|
||||
|
||||
The data returned is optional. Not all implementations may return all
|
||||
parameters in the CacheProperties.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeCachePropertiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 ProcessorId, //IN
|
||||
HSAuint32 NumCaches, //IN
|
||||
HsaCacheProperties* CacheProperties //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Retrieves the HSA IO affinity properties of a specific HSA node.
|
||||
the memory pointer passed as Properties is sized as
|
||||
NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
|
||||
hsaKmtGetNodePropertiesCtx() call.
|
||||
|
||||
The data returned is optional. Not all implementations may return all
|
||||
parameters in the IoLinkProperties.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetNodeIoLinkPropertiesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint32 NumIoLinks, //IN
|
||||
HsaIoLinkProperties* IoLinkProperties //OUT
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
Creates an operating system event associated with a HSA event ID
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCreateEventCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEventDescriptor* EventDesc, //IN
|
||||
bool ManualReset, //IN
|
||||
bool IsSignaled, //IN
|
||||
HsaEvent** Event //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Destroys an operating system event associated with a HSA event ID
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDestroyEventCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Sets the specified event object to the signaled state
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetEventCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Sets the specified event object to the non-signaled state
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtResetEventCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Queries the state of the specified event object
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtQueryEventStateCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Checks the current state of the event object. If the object's state is
|
||||
nonsignaled, the calling thread enters the wait state.
|
||||
|
||||
The function returns when one of the following occurs:
|
||||
- The specified event object is in the signaled state.
|
||||
- The time-out interval elapses.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtWaitOnEventCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Event, //IN
|
||||
HSAuint32 Milliseconds //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Checks the current state of the event object. If the object's state is
|
||||
nonsignaled, the calling thread enters the wait state. event_age can
|
||||
help avoiding race conditions.
|
||||
|
||||
The function returns when one of the following occurs:
|
||||
- The specified event object is in the signaled state.
|
||||
- The time-out interval elapses.
|
||||
- Tracking event age
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtWaitOnEvent_ExtCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Event, //IN
|
||||
HSAuint32 Milliseconds, //IN
|
||||
uint64_t *event_age //IN/OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Checks the current state of multiple event objects.
|
||||
|
||||
The function returns when one of the following occurs:
|
||||
- Either any one or all of the specified objects are in the signaled state
|
||||
- if "WaitOnAll" is "true" the function returns when the state of all
|
||||
objects in array is signaled
|
||||
- if "WaitOnAll" is "false" the function returns when the state of any
|
||||
one of the objects is set to signaled
|
||||
- The time-out interval elapses.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtWaitOnMultipleEventsCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Events[], //IN
|
||||
HSAuint32 NumEvents, //IN
|
||||
bool WaitOnAll, //IN
|
||||
HSAuint32 Milliseconds //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Checks the current state of multiple event objects.
|
||||
event_age can help avoiding race conditions.
|
||||
|
||||
The function returns when one of the following occurs:
|
||||
- Either any one or all of the specified objects are in the signaled state
|
||||
- if "WaitOnAll" is "true" the function returns when the state of all
|
||||
objects in array is signaled
|
||||
- if "WaitOnAll" is "false" the function returns when the state of any
|
||||
one of the objects is set to signaled
|
||||
- The time-out interval elapses.
|
||||
- Tracking event age
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtWaitOnMultipleEvents_ExtCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HsaEvent* Events[], //IN
|
||||
HSAuint32 NumEvents, //IN
|
||||
bool WaitOnAll, //IN
|
||||
HSAuint32 Milliseconds, //IN
|
||||
uint64_t *event_age //IN/OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Creates a GPU queue with user-mode access rights
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCreateQueueCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSA_QUEUE_TYPE Type, //IN
|
||||
HSAuint32 QueuePercentage, //IN
|
||||
HSA_QUEUE_PRIORITY Priority, //IN
|
||||
void* QueueAddress, //IN
|
||||
HSAuint64 QueueSizeInBytes, //IN
|
||||
HsaEvent* Event, //IN
|
||||
HsaQueueResource* QueueResource //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Creates a GPU queue with user-mode access rights
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtCreateQueueExtCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSA_QUEUE_TYPE Type, //IN
|
||||
HSAuint32 QueuePercentage, //IN
|
||||
HSA_QUEUE_PRIORITY Priority, //IN
|
||||
HSAuint32 SdmaEngineId, //IN
|
||||
void* QueueAddress, //IN
|
||||
HSAuint64 QueueSizeInBytes, //IN
|
||||
HsaEvent* Event, //IN
|
||||
HsaQueueResource* QueueResource //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Updates a queue
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUpdateQueueCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSA_QUEUEID QueueId, //IN
|
||||
HSAuint32 QueuePercentage, //IN
|
||||
HSA_QUEUE_PRIORITY Priority, //IN
|
||||
void* QueueAddress, //IN
|
||||
HSAuint64 QueueSize, //IN
|
||||
HsaEvent* Event //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Destroys a queue
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDestroyQueueCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSA_QUEUEID QueueId //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Set cu mask for a queue
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetQueueCUMaskCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSA_QUEUEID QueueId, //IN
|
||||
HSAuint32 CUMaskCount, //IN
|
||||
HSAuint32* QueueCUMask //IN
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetQueueInfoCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSA_QUEUEID QueueId, //IN
|
||||
HsaQueueInfo *QueueInfo //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetMemoryPolicyCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 Node, //IN
|
||||
HSAuint32 DefaultPolicy, //IN
|
||||
HSAuint32 AlternatePolicy, //IN
|
||||
void* MemoryAddressAlternate, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes //IN (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Allocates a memory buffer that may be accessed by the GPU
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAllocMemoryCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 PreferredNode, //IN
|
||||
HSAuint64 SizeInBytes, //IN (multiple of page size)
|
||||
HsaMemFlags MemFlags, //IN
|
||||
void** MemoryAddress //IN/OUT (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Allocates a memory buffer with specific alignment that may be accessed by the GPU
|
||||
If Alignment is 0, the smallest possible alignment will be used
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAllocMemoryAlignCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 PreferredNode, //IN
|
||||
HSAuint64 SizeInBytes, //IN (multiple of page size)
|
||||
HSAuint64 Alignment, //IN (power of 2 and >= page size)
|
||||
HsaMemFlags MemFlags, //IN
|
||||
void** MemoryAddress //IN/OUT (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Frees a memory buffer
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtFreeMemoryCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 SizeInBytes //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Inquires memory available for allocation as a memory buffer
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAvailableMemoryCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 Node, //IN
|
||||
HSAuint64 *AvailableBytes //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Registers with KFD a memory buffer that may be accessed by the GPU
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterMemoryCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* MemoryAddress, //IN (cache-aligned)
|
||||
HSAuint64 MemorySizeInBytes //IN (cache-aligned)
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
Registers with KFD a memory buffer that may be accessed by specific GPUs
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterMemoryToNodesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *MemoryAddress, //IN (cache-aligned)
|
||||
HSAuint64 MemorySizeInBytes, //IN (cache-aligned)
|
||||
HSAuint64 NumberOfNodes, //IN
|
||||
HSAuint32* NodeArray //IN
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
Registers with KFD a memory buffer with memory attributes
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterMemoryWithFlagsCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *MemoryAddress, //IN (cache-aligned)
|
||||
HSAuint64 MemorySizeInBytes, //IN (cache-aligned)
|
||||
HsaMemFlags MemFlags //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Registers with KFD a graphics buffer and returns graphics metadata
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterGraphicsHandleToNodesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint64 GraphicsResourceHandle, //IN
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT
|
||||
HSAuint64 NumberOfNodes, //IN
|
||||
HSAuint32* NodeArray //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Similar to hsaKmtRegisterGraphicsHandleToNodes but provides registration
|
||||
options via RegisterFlags.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterGraphicsHandleToNodesExtCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint64 GraphicsResourceHandle, //IN
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo, //OUT
|
||||
HSAuint64 NumberOfNodes, //IN
|
||||
HSAuint32* NodeArray, //IN
|
||||
HSA_REGISTER_MEM_FLAGS RegisterFlags //IN
|
||||
);
|
||||
|
||||
/**
|
||||
* Export a dmabuf handle and offset for a given memory address
|
||||
*
|
||||
* Validates that @MemoryAddress belongs to a valid allocation and that the
|
||||
* @MemorySizeInBytes doesn't exceed the end of that allocation. Returns a
|
||||
* dmabuf fd of the allocation and the offset of MemoryAddress within that
|
||||
* allocation. The memory will remain allocated even after the allocation is
|
||||
* freed by hsaKmtFreeMemory for as long as a dmabuf fd remains open or any
|
||||
* importer of that fd maintains an active reference to the memory.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtExportDMABufHandleCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *MemoryAddress, //IN
|
||||
HSAuint64 MemorySizeInBytes, //IN
|
||||
int *DMABufFd, //OUT
|
||||
HSAuint64 *Offset //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Export a memory buffer for sharing with other processes
|
||||
|
||||
NOTE: for the current revision of the thunk spec, SizeInBytes
|
||||
must match whole allocation.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtShareMemoryCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *MemoryAddress, //IN
|
||||
HSAuint64 SizeInBytes, //IN
|
||||
HsaSharedMemoryHandle *SharedMemoryHandle //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Register shared memory handle
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterSharedHandleCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
const HsaSharedMemoryHandle *SharedMemoryHandle, //IN
|
||||
void **MemoryAddress, //OUT
|
||||
HSAuint64 *SizeInBytes //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Register shared memory handle to specific nodes only
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterSharedHandleToNodesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
const HsaSharedMemoryHandle *SharedMemoryHandle, //IN
|
||||
void **MemoryAddress, //OUT
|
||||
HSAuint64 *SizeInBytes, //OUT
|
||||
HSAuint64 NumberOfNodes, //OUT
|
||||
HSAuint32* NodeArray //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Unregisters with KFD a memory buffer
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDeregisterMemoryCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* MemoryAddress //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Ensures that the memory is resident and can be accessed by GPU
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtMapMemoryToGPUCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
|
||||
HSAuint64* AlternateVAGPU //OUT (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Ensures that the memory is resident and can be accessed by GPUs
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtMapMemoryToGPUNodesCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
|
||||
HSAuint64* AlternateVAGPU, //OUT (page-aligned)
|
||||
HsaMemMapFlags MemMapFlags, //IN
|
||||
HSAuint64 NumberOfNodes, //IN
|
||||
HSAuint32* NodeArray //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Releases the residency of the memory
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUnmapMemoryToGPUCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void* MemoryAddress //IN (page-aligned)
|
||||
);
|
||||
|
||||
/**
|
||||
Stub for Unmap Graphic Handle
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUnmapGraphicHandleCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint64 FlatMemoryAddress, //IN
|
||||
HSAuint64 SizeInBytes //IN
|
||||
);
|
||||
|
||||
/**
|
||||
* Get an AMDGPU device handle for a GPU node
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetAMDGPUDeviceHandleCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaAMDGPUDeviceHandle *DeviceHandle //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Sets trap handler and trap buffer to be used for all queues
|
||||
associated with the specified NodeId within this process context
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetTrapHandlerCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
void* TrapHandlerBaseAddress, //IN
|
||||
HSAuint64 TrapHandlerSizeInBytes, //IN
|
||||
void* TrapBufferBaseAddress, //IN
|
||||
HSAuint64 TrapBufferSizeInBytes //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Gets image tile configuration.
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetTileConfigCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN
|
||||
HsaGpuTileConfig *config //IN/OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Returns information about pointers
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtQueryPointerInfoCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
const void *Pointer, //IN
|
||||
HsaPointerInfo *PointerInfo //OUT
|
||||
);
|
||||
|
||||
/**
|
||||
Associates user data with a memory allocation
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetMemoryUserDataCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
const void * Pointer, //IN
|
||||
void * UserData //IN
|
||||
);
|
||||
|
||||
/**
|
||||
Allocate GWS resource for a queue
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAllocQueueGWSCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSA_QUEUEID QueueId, //IN
|
||||
HSAuint32 nGWS, //IN
|
||||
HSAuint32 *firstGWS //OUT
|
||||
);
|
||||
|
||||
/* Helper functions for calling KFD SVM ioctl */
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSVMSetAttrCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *start_addr, //IN: Start of the virtual address range (page-aligned)
|
||||
HSAuint64 size, //IN: size (page-aligned)
|
||||
unsigned int nattr, //IN: number of attributes
|
||||
HSA_SVM_ATTRIBUTE *attrs //IN: array of attributes
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSVMGetAttrCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *start_addr, //IN: Start of the virtual address range (page-aligned)
|
||||
HSAuint64 size, //IN: size (page aligned)
|
||||
unsigned int nattr, //IN: number of attributes
|
||||
HSA_SVM_ATTRIBUTE *attrs //IN/OUT: array of attributes
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetXNACKModeCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAint32 enable //IN: enable/disable XNACK node.
|
||||
);
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetXNACKModeCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAint32 *enable //OUT: returns XNACK value.
|
||||
);
|
||||
|
||||
/**
|
||||
Open anonymous file handle to enable events and read SMI events.
|
||||
|
||||
To enable events, write 64bit events mask to fd, event enums as bit index.
|
||||
for example, event mask ctx(HSA_SMI_EVENT_MASK_FROM_INDEXCtx(HSA_SMI_EVENT_INDEX_MAX) - 1) to enable all events
|
||||
|
||||
Read event from fd is not blocking, use poll with timeout value to check if event is available.
|
||||
Event is dropped if kernel event fifo is full.
|
||||
*/
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtOpenSMICtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
HSAuint32 NodeId, //IN: GPU node_id to receive the SMI event from
|
||||
int *fd //OUT: anonymous file handle
|
||||
);
|
||||
|
||||
/**
|
||||
If this is GPU Mapped memory, remap the first page at this address to be normal system memory
|
||||
|
||||
This is used in ASAN mode to remap the first page of device memory to share host ASAN logic.
|
||||
This function is only supported when libhsakmt is compiled in ASAN mode.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtReplaceAsanHeaderPageCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *addr //IN: Start of the virtual address page
|
||||
);
|
||||
|
||||
/**
|
||||
If this is GPU Mapped memory, remap the first page back to the original GPU memory
|
||||
|
||||
This is used in ASAN mode to remap the first page back to its original mapping.
|
||||
This function is only supported when libhsakmt is compiled in ASAN mode.
|
||||
*/
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtReturnAsanHeaderPageCtx(
|
||||
HsaKFDContext *ctx, //IN
|
||||
void *addr //IN: Start of the virtual address page
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
#endif //_HSAKMTCTX_H_
|
||||
@@ -145,8 +145,8 @@ void model_init_env_vars(void)
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
assert(hsakmt_kfd_fd < 0);
|
||||
hsakmt_kfd_fd = fd;
|
||||
assert(hsakmt_primary_kfd_ctx.fd < 0);
|
||||
hsakmt_kfdcontext_init_context(fd, &hsakmt_primary_kfd_ctx);
|
||||
pthread_condattr_t condattr;
|
||||
pthread_condattr_init(&condattr);
|
||||
pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
|
||||
@@ -193,7 +193,7 @@ static uint64_t allocate_from_memfd(uint64_t size, uint64_t align)
|
||||
model_memfd_size = (model_memfd_size + align - 1) & ~(align - 1);
|
||||
uint64_t offset = model_memfd_size;
|
||||
model_memfd_size += size;
|
||||
int ret = ftruncate(hsakmt_kfd_fd, model_memfd_size);
|
||||
int ret = ftruncate(hsakmt_primary_kfd_ctx.fd, model_memfd_size);
|
||||
if (ret < 0)
|
||||
{
|
||||
fprintf(stderr, "model: ftruncate on memfd failed\n");
|
||||
@@ -269,7 +269,7 @@ void model_init(void)
|
||||
HSAKMT_STATUS result;
|
||||
HsaSystemProperties props;
|
||||
/* Read the topology to determine nodes. */
|
||||
result = hsakmt_topology_sysfs_get_system_props(&props);
|
||||
result = hsakmt_topology_sysfs_get_system_props(&hsakmt_primary_kfd_ctx, &props);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "model: Failed to parse topology\n");
|
||||
@@ -503,7 +503,7 @@ static int model_kfd_ioctl_locked(unsigned long request, void *arg)
|
||||
// unclear whether the current implementation causes kernel data
|
||||
// structures to grow. But in practice, it almost certainly never
|
||||
// matters.
|
||||
int ret = fallocate(hsakmt_kfd_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
int ret = fallocate(hsakmt_primary_kfd_ctx.fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
mem_data->file_offset, mem_data->size);
|
||||
if (ret != 0)
|
||||
{
|
||||
@@ -539,7 +539,7 @@ static int model_kfd_ioctl_locked(unsigned long request, void *arg)
|
||||
pr_debug("MODEL IOCTL: AMDKFD_IOC_MAP_MEMORY_TO_GPU: VA: %lx : Size: %lu, Flags: %x\n", mem_data->va_addr, mem_data->size, mem_data->flags);
|
||||
void *ret = mmap(VOID_PTR_ADD(model_nodes[node_id].aperture, mem_data->va_addr),
|
||||
mem_data->size, prot,
|
||||
MAP_SHARED | MAP_FIXED, hsakmt_kfd_fd, mem_data->file_offset);
|
||||
MAP_SHARED | MAP_FIXED, hsakmt_primary_kfd_ctx.fd, mem_data->file_offset);
|
||||
if (ret == MAP_FAILED)
|
||||
{
|
||||
fprintf(stderr, "model: mmap failed\n");
|
||||
@@ -767,7 +767,7 @@ static int model_kfd_ioctl_locked(unsigned long request, void *arg)
|
||||
model_functions->register_queue(model_nodes[node_id].model, &info);
|
||||
model_queues[queue_id].node_id = node_id;
|
||||
args->queue_id = queue_id;
|
||||
// Note that strictly speaking, this is the offset into the hsakmt_kfd_fd
|
||||
// Note that strictly speaking, this is the offset into the hsakmt_primary_kfd_ctx.fd
|
||||
// file, not the DRM fd (but they are the same in our case).
|
||||
args->doorbell_offset = model_nodes[node_id].doorbell_offset + 8 * queue_id;
|
||||
return 0;
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright © 2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including
|
||||
* the next paragraph) shall be included in all copies or substantial
|
||||
* portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kfdcontext.h"
|
||||
#include "libhsakmt.h"
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx)
|
||||
{
|
||||
assert(fd >= 0);
|
||||
assert(ctx);
|
||||
|
||||
ctx->fd = fd;
|
||||
ctx->queue_context = NULL;
|
||||
ctx->fmm_context = NULL;
|
||||
ctx->event_context = NULL;
|
||||
}
|
||||
|
||||
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx)
|
||||
{
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
if (ctx->queue_context) {
|
||||
free(ctx->queue_context);
|
||||
ctx->queue_context = NULL;
|
||||
}
|
||||
if (ctx->fmm_context) {
|
||||
free(ctx->fmm_context);
|
||||
ctx->fmm_context = NULL;
|
||||
}
|
||||
if (ctx->event_context) {
|
||||
free(ctx->event_context);
|
||||
ctx->event_context = NULL;
|
||||
}
|
||||
ctx->fd = -1;
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright © 2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including
|
||||
* the next paragraph) shall be included in all copies or substantial
|
||||
* portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _KFDCONTEXT_H_
|
||||
#define _KFDCONTEXT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct hsa_kfd_queue_context;
|
||||
struct hsa_kfd_fmm_context;
|
||||
struct hsa_kfd_event_context;
|
||||
|
||||
/*
|
||||
* HsaKFDContext
|
||||
*
|
||||
* Represents the execution context for a connection to the Kernel Fusion Driver (KFD).
|
||||
*
|
||||
* This structure encapsulates all state required to manage a KFD session, including:
|
||||
* - The file descriptor associated with the open KFD device
|
||||
* - Related resources tied to this file descriptor
|
||||
*
|
||||
* Multiple HsaKFDContext instances can coexist simultaneously, each maintaining its own
|
||||
* independent set of resources. These contexts are fully isolated from one another and
|
||||
* must not have their resources mixed. For example, memory resources created in
|
||||
* context A cannot be used in context B directly. If resources need to be shared between
|
||||
* contexts, they must be explicitly exported and imported using the appropriate APIs.
|
||||
*/
|
||||
typedef struct _HsaKFDContext
|
||||
{
|
||||
/* File descriptor for the KFD device */
|
||||
int fd;
|
||||
|
||||
/* Queue context for managing user queues */
|
||||
struct hsa_kfd_queue_context *queue_context;
|
||||
|
||||
/* Memory management context for managing memory */
|
||||
struct hsa_kfd_fmm_context *fmm_context;
|
||||
|
||||
/* Event context for managing events */
|
||||
struct hsa_kfd_event_context *event_context;
|
||||
} HsaKFDContext;
|
||||
|
||||
// Initialize a pre-allocated HsaKFDContext with the given file descriptor
|
||||
void hsakmt_kfdcontext_init_context(int fd, HsaKFDContext *ctx);
|
||||
// Release all resources associated with the given KFD context
|
||||
void hsakmt_kfdcontext_clear_context(HsaKFDContext *ctx);
|
||||
|
||||
struct hsa_kfd_fmm_context *hsakmt_kfdcontext_get_fmm_context(HsaKFDContext *ctx);
|
||||
struct hsa_kfd_queue_context *hsakmt_kfdcontext_get_queue_context(HsaKFDContext *ctx);
|
||||
struct hsa_kfd_event_context *hsakmt_kfdcontext_get_event_context(HsaKFDContext *ctx);
|
||||
|
||||
#endif /* _KFDCONTEXT_H_ */
|
||||
@@ -28,11 +28,12 @@
|
||||
|
||||
#include "hsakmt/linux/kfd_ioctl.h"
|
||||
#include "hsakmt/hsakmt.h"
|
||||
#include "kfdcontext.h"
|
||||
#include "hsakmtctx.h"
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
extern int hsakmt_kfd_fd;
|
||||
extern int hsakmt_udmabuf_dev_fd;
|
||||
extern unsigned long hsakmt_kfd_open_count;
|
||||
extern bool hsakmt_forked;
|
||||
@@ -42,6 +43,7 @@ extern bool hsakmt_is_svm_api_supported;
|
||||
extern int hsakmt_zfb_support;
|
||||
|
||||
extern HsaVersionInfo hsakmt_kfd_version_info;
|
||||
extern HsaKFDContext hsakmt_primary_kfd_ctx;
|
||||
|
||||
#undef HSAKMTAPI
|
||||
#define HSAKMTAPI __attribute__((visibility ("default")))
|
||||
@@ -196,7 +198,7 @@ int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
|
||||
HSAKMT_STATUS hsakmt_validate_nodeid_array(uint32_t **gpu_id_array,
|
||||
uint32_t NumberOfNodes, uint32_t *NodeArray);
|
||||
|
||||
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props);
|
||||
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx, HsaSystemProperties *props);
|
||||
HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
|
||||
HsaNodeProperties *NodeProperties);
|
||||
HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
|
||||
@@ -207,13 +209,16 @@ bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId);
|
||||
|
||||
HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags);
|
||||
|
||||
void* hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
|
||||
void* hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
|
||||
uint32_t size, uint32_t align,
|
||||
uint32_t gpu_id,
|
||||
uint32_t NodeId, bool NonPaged,
|
||||
bool DeviceLocal, bool Uncached);
|
||||
void hsakmt_free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align);
|
||||
HSAKMT_STATUS hsakmt_init_process_doorbells(unsigned int NumNodes);
|
||||
void hsakmt_destroy_process_doorbells(void);
|
||||
void hsakmt_free_exec_aligned_memory_gpu(HsaKFDContext *ctx,
|
||||
void *addr, uint32_t size, uint32_t align);
|
||||
HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx,
|
||||
unsigned int NumNodes);
|
||||
void hsakmt_destroy_process_doorbells(HsaKFDContext *ctx);
|
||||
HSAKMT_STATUS hsakmt_init_device_debugging_memory(unsigned int NumNodes);
|
||||
void hsakmt_destroy_device_debugging_memory(void);
|
||||
bool hsakmt_debug_get_reg_status(uint32_t node_id);
|
||||
@@ -239,10 +244,10 @@ extern int hsakmt_ioctl(int fd, unsigned long request, void *arg);
|
||||
|
||||
#define POWER_OF_2(x) ((x && (!(x & (x - 1)))) ? 1 : 0)
|
||||
|
||||
void hsakmt_clear_events_page(void);
|
||||
void hsakmt_fmm_clear_all_mem(void);
|
||||
void hsakmt_fmm_clear_all_aperture(void);
|
||||
void hsakmt_clear_process_doorbells(void);
|
||||
void hsakmt_clear_events_page(HsaKFDContext *ctx);
|
||||
void hsakmt_fmm_clear_all_mem(HsaKFDContext *ctx);
|
||||
void hsakmt_fmm_clear_all_aperture(HsaKFDContext *ctx);
|
||||
void hsakmt_clear_process_doorbells(HsaKFDContext *ctx);
|
||||
uint32_t hsakmt_get_num_sysfs_nodes(void);
|
||||
|
||||
bool hsakmt_is_forked_child(void);
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
#include <fcntl.h>
|
||||
#include "fmm.h"
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicyCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 Node,
|
||||
HSAuint32 DefaultPolicy,
|
||||
HSAuint32 AlternatePolicy,
|
||||
void *MemoryAddressAlternate,
|
||||
@@ -86,7 +87,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
|
||||
args.alternate_aperture_base = (uintptr_t) MemoryAddressAlternate;
|
||||
args.alternate_aperture_size = MemorySizeInBytes;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
|
||||
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
|
||||
|
||||
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -104,15 +105,17 @@ HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags)
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 PreferredNode,
|
||||
HSAuint64 SizeInBytes,
|
||||
HsaMemFlags MemFlags,
|
||||
void **MemoryAddress)
|
||||
{
|
||||
return hsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
|
||||
return hsaKmtAllocMemoryAlignCtx(ctx, PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlignCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 PreferredNode,
|
||||
HSAuint64 SizeInBytes,
|
||||
HSAuint64 Alignment,
|
||||
HsaMemFlags MemFlags,
|
||||
@@ -160,7 +163,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
|
||||
return HSAKMT_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
*MemoryAddress = hsakmt_fmm_allocate_scratch(gpu_id, *MemoryAddress, SizeInBytes);
|
||||
*MemoryAddress = hsakmt_fmm_allocate_scratch(ctx, gpu_id, *MemoryAddress, SizeInBytes);
|
||||
|
||||
if (!(*MemoryAddress)) {
|
||||
pr_err("[%s] failed to allocate %lu bytes from scratch\n",
|
||||
@@ -183,7 +186,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
|
||||
if (hsakmt_zfb_support && gpu_id && MemFlags.ui32.NonPaged == 1)
|
||||
MemFlags.ui32.CoarseGrain = 1;
|
||||
|
||||
*MemoryAddress = hsakmt_fmm_allocate_host(gpu_id, MemFlags.ui32.GTTAccess ? 0 : PreferredNode,
|
||||
*MemoryAddress = hsakmt_fmm_allocate_host(ctx, gpu_id, MemFlags.ui32.GTTAccess ? 0 : PreferredNode,
|
||||
*MemoryAddress, SizeInBytes, Alignment, MemFlags);
|
||||
|
||||
if (!(*MemoryAddress)) {
|
||||
@@ -204,7 +207,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
*MemoryAddress = hsakmt_fmm_allocate_device(gpu_id, PreferredNode, *MemoryAddress,
|
||||
*MemoryAddress = hsakmt_fmm_allocate_device(ctx, gpu_id, PreferredNode, *MemoryAddress,
|
||||
SizeInBytes, Alignment, MemFlags);
|
||||
|
||||
if (!(*MemoryAddress)) {
|
||||
@@ -218,7 +221,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
|
||||
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemoryCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 SizeInBytes)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
@@ -230,11 +234,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return hsakmt_fmm_release(MemoryAddress);
|
||||
return hsakmt_fmm_release(ctx, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
|
||||
HSAuint64 *AvailableBytes)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemoryCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 Node,
|
||||
HSAuint64 *AvailableBytes)
|
||||
{
|
||||
struct kfd_ioctl_get_available_memory_args args = {};
|
||||
HSAKMT_STATUS result;
|
||||
@@ -250,14 +255,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
|
||||
return result;
|
||||
}
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args))
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
*AvailableBytes = args.available;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
@@ -271,11 +277,13 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
|
||||
HsaMemFlags flags;
|
||||
flags.ui32.CoarseGrain = 1;
|
||||
flags.ui32.ExtendedCoherent = 0;
|
||||
return hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
|
||||
return hsakmt_fmm_register_memory(ctx,
|
||||
MemoryAddress, MemorySizeInBytes,
|
||||
NULL, 0, flags);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodesCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray)
|
||||
@@ -299,7 +307,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
|
||||
flags.ui32.CoarseGrain = 1;
|
||||
flags.ui32.ExtendedCoherent = 0;
|
||||
|
||||
ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
|
||||
ret = hsakmt_fmm_register_memory(ctx,
|
||||
MemoryAddress, MemorySizeInBytes,
|
||||
gpu_id_array,
|
||||
NumberOfNodes*sizeof(uint32_t),
|
||||
flags);
|
||||
@@ -310,7 +319,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlagsCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HsaMemFlags MemFlags)
|
||||
{
|
||||
@@ -331,21 +341,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
|
||||
/* TODO: support mixed APU and dGPU configurations */
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
|
||||
ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
|
||||
ret = hsakmt_fmm_register_memory(ctx,
|
||||
MemoryAddress, MemorySizeInBytes,
|
||||
NULL, 0, MemFlags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesCtx(HsaKFDContext *ctx,
|
||||
HSAuint64 GraphicsResourceHandle,
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray)
|
||||
{
|
||||
HSA_REGISTER_MEM_FLAGS regFlags;
|
||||
regFlags.Value = 0;
|
||||
|
||||
return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle,
|
||||
|
||||
return hsaKmtRegisterGraphicsHandleToNodesExtCtx(ctx,
|
||||
GraphicsResourceHandle,
|
||||
GraphicsResourceInfo,
|
||||
NumberOfNodes,
|
||||
NodeArray,
|
||||
@@ -353,7 +366,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsRe
|
||||
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExtCtx(HsaKFDContext *ctx,
|
||||
HSAuint64 GraphicsResourceHandle,
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray,
|
||||
@@ -371,7 +385,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 Graphic
|
||||
}
|
||||
|
||||
if (ret == HSAKMT_STATUS_SUCCESS) {
|
||||
ret = hsakmt_fmm_register_graphics_handle(
|
||||
ret = hsakmt_fmm_register_graphics_handle(ctx,
|
||||
GraphicsResourceHandle, GraphicsResourceInfo,
|
||||
gpu_id_array, NumberOfNodes * sizeof(uint32_t), RegisterFlags);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
@@ -381,7 +395,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 Graphic
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandleCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
int *DMABufFd,
|
||||
HSAuint64 *Offset)
|
||||
@@ -391,11 +406,13 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
|
||||
|
||||
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
|
||||
|
||||
return hsakmt_fmm_export_dma_buf_fd(MemoryAddress, MemorySizeInBytes,
|
||||
return hsakmt_fmm_export_dma_buf_fd(ctx,
|
||||
MemoryAddress, MemorySizeInBytes,
|
||||
DMABufFd, Offset);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemoryCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 SizeInBytes,
|
||||
HsaSharedMemoryHandle *SharedMemoryHandle)
|
||||
{
|
||||
@@ -406,25 +423,28 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
|
||||
if (!SharedMemoryHandle)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
return hsakmt_fmm_share_memory(MemoryAddress, SizeInBytes, SharedMemoryHandle);
|
||||
return hsakmt_fmm_share_memory(ctx, MemoryAddress, SizeInBytes, SharedMemoryHandle);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
void **MemoryAddress,
|
||||
HSAuint64 *SizeInBytes)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleCtx(HsaKFDContext *ctx,
|
||||
const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
void **MemoryAddress,
|
||||
HSAuint64 *SizeInBytes)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
pr_debug("[%s] handle %p\n", __func__, SharedMemoryHandle);
|
||||
|
||||
return hsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle,
|
||||
return hsaKmtRegisterSharedHandleToNodesCtx(ctx,
|
||||
SharedMemoryHandle,
|
||||
MemoryAddress,
|
||||
SizeInBytes,
|
||||
0,
|
||||
NULL);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodesCtx(HsaKFDContext *ctx,
|
||||
const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
void **MemoryAddress,
|
||||
HSAuint64 *SizeInBytes,
|
||||
HSAuint64 NumberOfNodes,
|
||||
@@ -447,7 +467,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryH
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = hsakmt_fmm_register_shared_memory(SharedMemoryHandle,
|
||||
ret = hsakmt_fmm_register_shared_memory(ctx,
|
||||
SharedMemoryHandle,
|
||||
SizeInBytes,
|
||||
MemoryAddress,
|
||||
gpu_id_array,
|
||||
@@ -487,17 +508,17 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMWrite(HSAuint32 Pid,
|
||||
return HSAKMT_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemoryCtx(HsaKFDContext *ctx, void *MemoryAddress)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
|
||||
|
||||
return hsakmt_fmm_deregister_memory(MemoryAddress);
|
||||
return hsakmt_fmm_deregister_memory(ctx, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HSAuint64 *AlternateVAGPU)
|
||||
{
|
||||
@@ -513,10 +534,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
|
||||
if (AlternateVAGPU)
|
||||
*AlternateVAGPU = 0;
|
||||
|
||||
return hsakmt_fmm_map_to_gpu(MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
|
||||
return hsakmt_fmm_map_to_gpu(ctx, MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodesCtx(HsaKFDContext *ctx,
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HSAuint64 *AlternateVAGPU,
|
||||
HsaMemMapFlags MemMapFlags,
|
||||
@@ -537,16 +559,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
|
||||
}
|
||||
|
||||
if (!hsakmt_is_dgpu && NumberOfNodes == 1)
|
||||
return hsaKmtMapMemoryToGPU(MemoryAddress,
|
||||
MemorySizeInBytes,
|
||||
AlternateVAGPU);
|
||||
return hsaKmtMapMemoryToGPUCtx(ctx, MemoryAddress,
|
||||
MemorySizeInBytes, AlternateVAGPU);
|
||||
|
||||
ret = hsakmt_validate_nodeid_array(&gpu_id_array,
|
||||
NumberOfNodes, NodeArray);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
|
||||
ret = hsakmt_fmm_map_to_gpu_nodes(MemoryAddress, MemorySizeInBytes,
|
||||
ret = hsakmt_fmm_map_to_gpu_nodes(ctx, MemoryAddress, MemorySizeInBytes,
|
||||
gpu_id_array, NumberOfNodes, AlternateVAGPU);
|
||||
|
||||
if (gpu_id_array)
|
||||
@@ -555,7 +576,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPUCtx(HsaKFDContext *ctx, void *MemoryAddress)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -567,7 +588,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
if (!hsakmt_fmm_unmap_from_gpu(MemoryAddress))
|
||||
if (!hsakmt_fmm_unmap_from_gpu(ctx, MemoryAddress))
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
else
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -588,16 +609,16 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtMapGraphicHandle(HSAuint32 NodeId,
|
||||
return HSAKMT_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandleCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint64 FlatMemoryAddress,
|
||||
HSAuint64 SizeInBytes)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
|
||||
return hsaKmtUnmapMemoryToGPUCtx(ctx, PORT_UINT64_TO_VPTR(FlatMemoryAddress));
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfigCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId, HsaGpuTileConfig *config)
|
||||
{
|
||||
struct kfd_ioctl_get_tile_config_args args = {0};
|
||||
uint32_t gpu_id;
|
||||
@@ -623,7 +644,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *
|
||||
args.num_tile_configs = config->NumTileConfigs;
|
||||
args.num_macro_tile_configs = config->NumMacroTileConfigs;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0)
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
config->NumTileConfigs = args.num_tile_configs;
|
||||
@@ -637,7 +658,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfoCtx(HsaKFDContext *ctx,
|
||||
const void *Pointer,
|
||||
HsaPointerInfo *PointerInfo)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
@@ -646,47 +668,264 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
|
||||
|
||||
if (!PointerInfo)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
return hsakmt_fmm_get_mem_info(Pointer, PointerInfo);
|
||||
return hsakmt_fmm_get_mem_info(ctx, Pointer, PointerInfo);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserDataCtx(HsaKFDContext *ctx,
|
||||
const void *Pointer,
|
||||
void *UserData)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
pr_debug("[%s] pointer %p\n", __func__, Pointer);
|
||||
|
||||
return hsakmt_fmm_set_mem_user_data(Pointer, UserData);
|
||||
return hsakmt_fmm_set_mem_user_data(ctx, Pointer, UserData);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPageCtx(HsaKFDContext *ctx, void *addr)
|
||||
{
|
||||
#ifdef SANITIZER_AMDGPU
|
||||
pr_debug("[%s] address %p\n", __func__, addr);
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
return hsakmt_fmm_replace_asan_header_page(addr);
|
||||
return hsakmt_fmm_replace_asan_header_page(ctx, addr);
|
||||
#else
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
#endif
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPageCtx(HsaKFDContext *ctx, void *addr)
|
||||
{
|
||||
#ifdef SANITIZER_AMDGPU
|
||||
pr_debug("[%s] address %p\n", __func__, addr);
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
return hsakmt_fmm_return_asan_header_page(addr);
|
||||
return hsakmt_fmm_return_asan_header_page(ctx, addr);
|
||||
#else
|
||||
return HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
#endif
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandleCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HsaAMDGPUDeviceHandle *DeviceHandle)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
return hsakmt_fmm_get_amdgpu_device_handle(NodeId, DeviceHandle);
|
||||
return hsakmt_fmm_get_amdgpu_device_handle(ctx, NodeId, DeviceHandle);
|
||||
}
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
|
||||
HSAuint32 DefaultPolicy,
|
||||
HSAuint32 AlternatePolicy,
|
||||
void *MemoryAddressAlternate,
|
||||
HSAuint64 MemorySizeInBytes)
|
||||
{
|
||||
return hsaKmtSetMemoryPolicyCtx(&hsakmt_primary_kfd_ctx, Node,
|
||||
DefaultPolicy, AlternatePolicy,
|
||||
MemoryAddressAlternate, MemorySizeInBytes);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode,
|
||||
HSAuint64 SizeInBytes,
|
||||
HsaMemFlags MemFlags,
|
||||
void **MemoryAddress)
|
||||
{
|
||||
return hsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode,
|
||||
HSAuint64 SizeInBytes,
|
||||
HSAuint64 Alignment,
|
||||
HsaMemFlags MemFlags,
|
||||
void **MemoryAddress)
|
||||
{
|
||||
return hsaKmtAllocMemoryAlignCtx(&hsakmt_primary_kfd_ctx, PreferredNode,
|
||||
SizeInBytes, Alignment, MemFlags, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
|
||||
HSAuint64 SizeInBytes)
|
||||
{
|
||||
return hsaKmtFreeMemoryCtx(&hsakmt_primary_kfd_ctx, MemoryAddress, SizeInBytes);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
|
||||
HSAuint64 *AvailableBytes)
|
||||
{
|
||||
return hsaKmtAvailableMemoryCtx(&hsakmt_primary_kfd_ctx, Node, AvailableBytes);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes)
|
||||
{
|
||||
return hsaKmtRegisterMemoryCtx(&hsakmt_primary_kfd_ctx, MemoryAddress, MemorySizeInBytes);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray)
|
||||
{
|
||||
return hsaKmtRegisterMemoryToNodesCtx(&hsakmt_primary_kfd_ctx,
|
||||
MemoryAddress, MemorySizeInBytes,
|
||||
NumberOfNodes, NodeArray);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HsaMemFlags MemFlags)
|
||||
{
|
||||
return hsaKmtRegisterMemoryWithFlagsCtx(&hsakmt_primary_kfd_ctx,
|
||||
MemoryAddress, MemorySizeInBytes, MemFlags);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle,
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray)
|
||||
{
|
||||
HSA_REGISTER_MEM_FLAGS regFlags;
|
||||
regFlags.Value = 0;
|
||||
|
||||
return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle,
|
||||
GraphicsResourceInfo,
|
||||
NumberOfNodes,
|
||||
NodeArray,
|
||||
regFlags);
|
||||
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle,
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray,
|
||||
HSA_REGISTER_MEM_FLAGS RegisterFlags)
|
||||
{
|
||||
return hsaKmtRegisterGraphicsHandleToNodesExtCtx(&hsakmt_primary_kfd_ctx,
|
||||
GraphicsResourceHandle,
|
||||
GraphicsResourceInfo,
|
||||
NumberOfNodes,
|
||||
NodeArray,
|
||||
RegisterFlags);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
int *DMABufFd,
|
||||
HSAuint64 *Offset)
|
||||
{
|
||||
return hsaKmtExportDMABufHandleCtx(&hsakmt_primary_kfd_ctx,
|
||||
MemoryAddress, MemorySizeInBytes,
|
||||
DMABufFd, Offset);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
|
||||
HSAuint64 SizeInBytes,
|
||||
HsaSharedMemoryHandle *SharedMemoryHandle)
|
||||
{
|
||||
return hsaKmtShareMemoryCtx(&hsakmt_primary_kfd_ctx,
|
||||
MemoryAddress, SizeInBytes, SharedMemoryHandle);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(
|
||||
const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
void **MemoryAddress,
|
||||
HSAuint64 *SizeInBytes)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
pr_debug("[%s] handle %p\n", __func__, SharedMemoryHandle);
|
||||
|
||||
return hsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle,
|
||||
MemoryAddress,
|
||||
SizeInBytes,
|
||||
0,
|
||||
NULL);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle,
|
||||
void **MemoryAddress,
|
||||
HSAuint64 *SizeInBytes,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray)
|
||||
{
|
||||
return hsaKmtRegisterSharedHandleToNodesCtx(&hsakmt_primary_kfd_ctx,
|
||||
SharedMemoryHandle,
|
||||
MemoryAddress,
|
||||
SizeInBytes,
|
||||
NumberOfNodes,
|
||||
NodeArray);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress)
|
||||
{
|
||||
return hsaKmtDeregisterMemoryCtx(&hsakmt_primary_kfd_ctx, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HSAuint64 *AlternateVAGPU)
|
||||
{
|
||||
return hsaKmtMapMemoryToGPUCtx(&hsakmt_primary_kfd_ctx, MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(
|
||||
void *MemoryAddress,
|
||||
HSAuint64 MemorySizeInBytes,
|
||||
HSAuint64 *AlternateVAGPU,
|
||||
HsaMemMapFlags MemMapFlags,
|
||||
HSAuint64 NumberOfNodes,
|
||||
HSAuint32 *NodeArray)
|
||||
{
|
||||
return hsaKmtMapMemoryToGPUNodesCtx(&hsakmt_primary_kfd_ctx, MemoryAddress,
|
||||
MemorySizeInBytes, AlternateVAGPU, MemMapFlags, NumberOfNodes, NodeArray);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
|
||||
{
|
||||
return hsaKmtUnmapMemoryToGPUCtx(&hsakmt_primary_kfd_ctx, MemoryAddress);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId,
|
||||
HSAuint64 FlatMemoryAddress,
|
||||
HSAuint64 SizeInBytes)
|
||||
{
|
||||
return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config)
|
||||
{
|
||||
return hsaKmtGetTileConfigCtx(&hsakmt_primary_kfd_ctx, NodeId, config);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
|
||||
HsaPointerInfo *PointerInfo)
|
||||
{
|
||||
return hsaKmtQueryPointerInfoCtx(&hsakmt_primary_kfd_ctx, Pointer, PointerInfo);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer,
|
||||
void *UserData)
|
||||
{
|
||||
return hsaKmtSetMemoryUserDataCtx(&hsakmt_primary_kfd_ctx, Pointer, UserData);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr)
|
||||
{
|
||||
return hsaKmtReplaceAsanHeaderPageCtx(&hsakmt_primary_kfd_ctx, addr);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
|
||||
{
|
||||
return hsaKmtReturnAsanHeaderPageCtx(&hsakmt_primary_kfd_ctx, addr);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle(HSAuint32 NodeId,
|
||||
HsaAMDGPUDeviceHandle *DeviceHandle)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
return hsaKmtGetAMDGPUDeviceHandleCtx(&hsakmt_primary_kfd_ctx, NodeId, DeviceHandle);
|
||||
}
|
||||
|
||||
@@ -51,6 +51,8 @@ static pid_t parent_pid = -1;
|
||||
int hsakmt_debug_level;
|
||||
bool hsakmt_forked;
|
||||
|
||||
HsaKFDContext hsakmt_primary_kfd_ctx = {.fd = -1};
|
||||
|
||||
/* hsakmt_is_forked_child detects when the process has forked since the last
|
||||
* time this function was called. We cannot rely on pthread_atfork
|
||||
* because the process can fork without calling the fork function in
|
||||
@@ -99,16 +101,18 @@ static void child_fork_handler(void)
|
||||
* The topology information is duplicated from the parent is valid
|
||||
* in the child process so it is not cleared
|
||||
*/
|
||||
static void clear_after_fork(void)
|
||||
static void clear_after_fork(HsaKFDContext *ctx)
|
||||
{
|
||||
hsakmt_clear_process_doorbells();
|
||||
hsakmt_clear_events_page();
|
||||
hsakmt_fmm_clear_all_mem();
|
||||
hsakmt_clear_process_doorbells(ctx);
|
||||
hsakmt_clear_events_page(ctx);
|
||||
hsakmt_fmm_clear_all_mem(ctx);
|
||||
hsakmt_destroy_device_debugging_memory();
|
||||
if (hsakmt_kfd_fd) {
|
||||
close(hsakmt_kfd_fd);
|
||||
hsakmt_kfd_fd = -1;
|
||||
}
|
||||
|
||||
int fd = ctx->fd;
|
||||
if (fd >= 0) {
|
||||
hsakmt_kfdcontext_clear_context(ctx);
|
||||
close(fd);
|
||||
}
|
||||
if (hsakmt_udmabuf_dev_fd > 0) {
|
||||
close(hsakmt_udmabuf_dev_fd);
|
||||
hsakmt_udmabuf_dev_fd = -1;
|
||||
@@ -150,7 +154,7 @@ static HSAKMT_STATUS init_vars_from_env(void)
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFDCtx(HsaKFDContext **pCtx)
|
||||
{
|
||||
HSAKMT_STATUS result;
|
||||
int fd = -1;
|
||||
@@ -166,7 +170,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
* belong to the parent
|
||||
*/
|
||||
if (hsakmt_is_forked_child())
|
||||
clear_after_fork();
|
||||
clear_after_fork(&hsakmt_primary_kfd_ctx);
|
||||
|
||||
if (hsakmt_kfd_open_count == 0) {
|
||||
static bool atfork_installed = false;
|
||||
@@ -184,15 +188,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
// Check if we are using the hsakmtmodel and setup initial state
|
||||
model_init_env_vars();
|
||||
|
||||
if (hsakmt_kfd_fd < 0 && !hsakmt_use_model) {
|
||||
if (hsakmt_primary_kfd_ctx.fd < 0 && !hsakmt_use_model) {
|
||||
fd = open(kfd_device_name, O_RDWR | O_CLOEXEC);
|
||||
|
||||
if (fd == -1) {
|
||||
result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
|
||||
goto open_failed;
|
||||
}
|
||||
|
||||
hsakmt_kfd_fd = fd;
|
||||
hsakmt_kfdcontext_init_context(fd, &hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
init_page_size();
|
||||
@@ -216,8 +219,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
useSvmStr = getenv("HSA_USE_SVM");
|
||||
hsakmt_is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0"));
|
||||
if(!hsakmt_use_model)
|
||||
result = hsakmt_topology_sysfs_get_system_props(&sys_props);
|
||||
|
||||
result = hsakmt_topology_sysfs_get_system_props(&hsakmt_primary_kfd_ctx, &sys_props);
|
||||
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
goto topology_sysfs_failed;
|
||||
|
||||
@@ -227,6 +230,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
pr_warn("Insufficient Memory. Debugging unavailable\n");
|
||||
|
||||
hsakmt_init_counter_props(sys_props.NumNodes);
|
||||
*pCtx = &hsakmt_primary_kfd_ctx;
|
||||
|
||||
if (!atfork_installed) {
|
||||
/* Atfork handlers cannot be uninstalled and
|
||||
@@ -241,6 +245,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
}
|
||||
} else {
|
||||
hsakmt_kfd_open_count++;
|
||||
*pCtx = &hsakmt_primary_kfd_ctx;
|
||||
result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
|
||||
}
|
||||
|
||||
@@ -256,7 +261,7 @@ open_failed:
|
||||
return result;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFDCtx(void)
|
||||
{
|
||||
HSAKMT_STATUS result;
|
||||
|
||||
@@ -266,7 +271,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
|
||||
if (--hsakmt_kfd_open_count == 0) {
|
||||
hsakmt_destroy_counter_props();
|
||||
hsakmt_destroy_device_debugging_memory();
|
||||
hsakmt_fmm_clear_all_aperture();
|
||||
hsakmt_fmm_clear_all_aperture(&hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
result = HSAKMT_STATUS_SUCCESS;
|
||||
@@ -277,3 +282,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
|
||||
{
|
||||
HsaKFDContext *pCtx = NULL;
|
||||
return hsaKmtOpenKFDCtx(&pCtx);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
|
||||
{
|
||||
return hsaKmtCloseKFDCtx();
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingQueryCapabilities(HSAuint32 NodeId, void
|
||||
args.num_sample_info = sample_info_sz;
|
||||
args.flags = 0;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
|
||||
*size = args.num_sample_info;
|
||||
|
||||
@@ -111,7 +111,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingCreate(HSAuint32 NodeId, HsaPcSamplingIn
|
||||
args.num_sample_info = 1;
|
||||
args.trace_id = INVALID_TRACE_ID;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
if (err) {
|
||||
switch (errno) {
|
||||
case EINVAL:
|
||||
@@ -151,7 +151,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingDestroy(HSAuint32 NodeId, HsaPcSamplingT
|
||||
args.gpu_id = gpu_id;
|
||||
args.trace_id = traceId;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
if (err) {
|
||||
if (errno == EINVAL)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -181,7 +181,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStart(HSAuint32 NodeId, HsaPcSamplingTra
|
||||
args.gpu_id = gpu_id;
|
||||
args.trace_id = traceId;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
if (err) {
|
||||
switch (errno) {
|
||||
case EINVAL:
|
||||
@@ -220,7 +220,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStop(HSAuint32 NodeId, HsaPcSamplingTrac
|
||||
args.gpu_id = gpu_id;
|
||||
args.trace_id = traceId;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
int err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
if (err) {
|
||||
switch (errno) {
|
||||
case EINVAL:
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* 1024 doorbells, 4 or 8 bytes each doorbell depending on ASIC generation */
|
||||
#define DOORBELL_SIZE(gfxv) (((gfxv) >= 0x90000) ? 8 : 4)
|
||||
@@ -80,8 +81,28 @@ struct process_doorbells {
|
||||
pthread_mutex_t mutex;
|
||||
};
|
||||
|
||||
static unsigned int num_doorbells;
|
||||
static struct process_doorbells *doorbells;
|
||||
struct hsa_kfd_queue_context
|
||||
{
|
||||
unsigned int num_doorbells;
|
||||
struct process_doorbells *doorbells;
|
||||
};
|
||||
|
||||
struct hsa_kfd_queue_context *hsakmt_kfdcontext_get_queue_context(HsaKFDContext *ctx)
|
||||
{
|
||||
assert(ctx);
|
||||
|
||||
if (ctx->queue_context)
|
||||
return ctx->queue_context;
|
||||
|
||||
ctx->queue_context = calloc(1, sizeof(struct hsa_kfd_queue_context));
|
||||
if (!ctx->queue_context) {
|
||||
pr_err("Alloc memory failed for struct hsa_kfd_queue_context size %zu\n",
|
||||
sizeof(struct hsa_kfd_queue_context));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ctx->queue_context;
|
||||
}
|
||||
|
||||
uint32_t hsakmt_get_vgpr_size_per_cu(uint32_t gfxv)
|
||||
{
|
||||
@@ -102,26 +123,27 @@ uint32_t hsakmt_get_vgpr_size_per_cu(uint32_t gfxv)
|
||||
return vgpr_size;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_init_process_doorbells(unsigned int NumNodes)
|
||||
HSAKMT_STATUS hsakmt_init_process_doorbells(HsaKFDContext *ctx, unsigned int NumNodes)
|
||||
{
|
||||
unsigned int i;
|
||||
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
|
||||
/* doorbells[] is accessed using Topology NodeId. This means doorbells[0],
|
||||
/* queue_ctx->doorbells[] is accessed using Topology NodeId. This means doorbells[0],
|
||||
* which corresponds to CPU only Node, might not be used
|
||||
*/
|
||||
doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
|
||||
if (!doorbells)
|
||||
queue_ctx->doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
|
||||
if (!queue_ctx->doorbells)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
for (i = 0; i < NumNodes; i++) {
|
||||
doorbells[i].use_gpuvm = false;
|
||||
doorbells[i].size = 0;
|
||||
doorbells[i].mapping = NULL;
|
||||
pthread_mutex_init(&doorbells[i].mutex, NULL);
|
||||
queue_ctx->doorbells[i].use_gpuvm = false;
|
||||
queue_ctx->doorbells[i].size = 0;
|
||||
queue_ctx->doorbells[i].mapping = NULL;
|
||||
pthread_mutex_init(&queue_ctx->doorbells[i].mutex, NULL);
|
||||
}
|
||||
|
||||
num_doorbells = NumNodes;
|
||||
queue_ctx->num_doorbells = NumNodes;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -144,94 +166,105 @@ static void get_doorbell_map_info(uint32_t node_id,
|
||||
return;
|
||||
}
|
||||
|
||||
void hsakmt_destroy_process_doorbells(void)
|
||||
void hsakmt_destroy_process_doorbells(HsaKFDContext *ctx)
|
||||
{
|
||||
unsigned int i;
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
struct process_doorbells *doorbells = queue_ctx->doorbells;
|
||||
|
||||
if (!doorbells)
|
||||
return;
|
||||
|
||||
for (i = 0; i < num_doorbells; i++) {
|
||||
for (i = 0; i < queue_ctx->num_doorbells; i++) {
|
||||
if (!doorbells[i].size)
|
||||
continue;
|
||||
|
||||
if (doorbells[i].use_gpuvm) {
|
||||
hsakmt_fmm_unmap_from_gpu(doorbells[i].mapping);
|
||||
hsakmt_fmm_release(doorbells[i].mapping);
|
||||
hsakmt_fmm_unmap_from_gpu(ctx, doorbells[i].mapping);
|
||||
hsakmt_fmm_release(ctx, doorbells[i].mapping);
|
||||
} else
|
||||
munmap(doorbells[i].mapping, doorbells[i].size);
|
||||
}
|
||||
|
||||
free(doorbells);
|
||||
doorbells = NULL;
|
||||
num_doorbells = 0;
|
||||
queue_ctx->doorbells = NULL;
|
||||
queue_ctx->num_doorbells = 0;
|
||||
}
|
||||
|
||||
/* This is a special funcion that should be called only from the child process
|
||||
* after a fork(). This will clear doorbells duplicated from the parent.
|
||||
*/
|
||||
void hsakmt_clear_process_doorbells(void)
|
||||
void hsakmt_clear_process_doorbells(HsaKFDContext *ctx)
|
||||
{
|
||||
unsigned int i;
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
|
||||
if (!doorbells)
|
||||
if (!queue_ctx->doorbells)
|
||||
return;
|
||||
|
||||
for (i = 0; i < num_doorbells; i++) {
|
||||
if (!doorbells[i].size)
|
||||
for (i = 0; i < queue_ctx->num_doorbells; i++) {
|
||||
if (!queue_ctx->doorbells[i].size)
|
||||
continue;
|
||||
|
||||
if (!doorbells[i].use_gpuvm)
|
||||
munmap(doorbells[i].mapping, doorbells[i].size);
|
||||
if (!queue_ctx->doorbells[i].use_gpuvm)
|
||||
munmap(queue_ctx->doorbells[i].mapping, queue_ctx->doorbells[i].size);
|
||||
}
|
||||
|
||||
free(doorbells);
|
||||
doorbells = NULL;
|
||||
num_doorbells = 0;
|
||||
free(queue_ctx->doorbells);
|
||||
queue_ctx->doorbells = NULL;
|
||||
queue_ctx->num_doorbells = 0;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS map_doorbell_apu(HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
static HSAKMT_STATUS map_doorbell_apu(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
HSAuint64 doorbell_mmap_offset)
|
||||
{
|
||||
void *ptr;
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
|
||||
ptr = mmap(0, doorbells[NodeId].size, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED, hsakmt_kfd_fd, doorbell_mmap_offset);
|
||||
ptr = mmap(0, queue_ctx->doorbells[NodeId].size, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED, ctx->fd, doorbell_mmap_offset);
|
||||
|
||||
if (ptr == MAP_FAILED)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
doorbells[NodeId].mapping = ptr;
|
||||
queue_ctx->doorbells[NodeId].mapping = ptr;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS map_doorbell_dgpu(HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
static HSAKMT_STATUS map_doorbell_dgpu(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
HSAuint64 doorbell_mmap_offset)
|
||||
{
|
||||
void *ptr;
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
|
||||
ptr = hsakmt_fmm_allocate_doorbell(gpu_id, doorbells[NodeId].size,
|
||||
ptr = hsakmt_fmm_allocate_doorbell(ctx,
|
||||
gpu_id, queue_ctx->doorbells[NodeId].size,
|
||||
doorbell_mmap_offset);
|
||||
|
||||
if (!ptr)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
/* map for GPU access */
|
||||
if (hsakmt_fmm_map_to_gpu(ptr, doorbells[NodeId].size, NULL)) {
|
||||
hsakmt_fmm_release(ptr);
|
||||
if (hsakmt_fmm_map_to_gpu(ctx, ptr, queue_ctx->doorbells[NodeId].size, NULL)) {
|
||||
hsakmt_fmm_release(ctx, ptr);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
doorbells[NodeId].mapping = ptr;
|
||||
queue_ctx->doorbells[NodeId].mapping = ptr;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
static HSAKMT_STATUS map_doorbell(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
HSAuint64 doorbell_mmap_offset)
|
||||
{
|
||||
HSAKMT_STATUS status = HSAKMT_STATUS_SUCCESS;
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
struct process_doorbells *doorbells = queue_ctx->doorbells;
|
||||
|
||||
pthread_mutex_lock(&doorbells[NodeId].mutex);
|
||||
if (doorbells[NodeId].size) {
|
||||
@@ -242,16 +275,16 @@ static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
|
||||
get_doorbell_map_info(NodeId, &doorbells[NodeId]);
|
||||
|
||||
if (doorbells[NodeId].use_gpuvm) {
|
||||
status = map_doorbell_dgpu(NodeId, gpu_id, doorbell_mmap_offset);
|
||||
status = map_doorbell_dgpu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
/* Fall back to the old method if KFD doesn't
|
||||
* support doorbells in GPUVM
|
||||
*/
|
||||
doorbells[NodeId].use_gpuvm = false;
|
||||
status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
|
||||
status = map_doorbell_apu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
|
||||
}
|
||||
} else
|
||||
status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
|
||||
status = map_doorbell_apu(ctx, NodeId, gpu_id, doorbell_mmap_offset);
|
||||
|
||||
if (status != HSAKMT_STATUS_SUCCESS)
|
||||
doorbells[NodeId].size = 0;
|
||||
@@ -279,13 +312,13 @@ static void *allocate_exec_aligned_memory_cpu(uint32_t size)
|
||||
}
|
||||
|
||||
/* The bool return indicate whether the queue needs a context-save-restore area*/
|
||||
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
|
||||
static bool update_ctx_save_restore_size(HsaKFDContext *ctx, uint32_t nodeid, struct queue *q)
|
||||
{
|
||||
HsaNodeProperties node;
|
||||
|
||||
if (q->gfxv < GFX_VERSION_CARRIZO)
|
||||
return false;
|
||||
if (hsaKmtGetNodeProperties(nodeid, &node))
|
||||
if (hsaKmtGetNodePropertiesCtx(ctx, nodeid, &node))
|
||||
return false;
|
||||
if (node.NumFComputeCores && node.NumSIMDPerCU) {
|
||||
uint32_t ctl_stack_size, wg_data_size;
|
||||
@@ -316,7 +349,8 @@ static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
|
||||
return false;
|
||||
}
|
||||
|
||||
void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uint32_t gpu_id,
|
||||
void *hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx,
|
||||
uint32_t size, uint32_t align, uint32_t gpu_id,
|
||||
uint32_t NodeId, bool nonPaged,
|
||||
bool DeviceLocal,
|
||||
bool Uncached)
|
||||
@@ -337,7 +371,7 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uin
|
||||
size = ALIGN_UP(size, align);
|
||||
|
||||
if (DeviceLocal && !hsakmt_zfb_support)
|
||||
mem = hsakmt_fmm_allocate_device(gpu_id, NodeId, mem, size, 0, flags);
|
||||
mem = hsakmt_fmm_allocate_device(ctx, gpu_id, NodeId, mem, size, 0, flags);
|
||||
else {
|
||||
/* VRAM under ZFB mode should be supported here without any
|
||||
* additional code
|
||||
@@ -352,7 +386,7 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uin
|
||||
cpu_id = 0;
|
||||
}
|
||||
}
|
||||
mem = hsakmt_fmm_allocate_host(gpu_id, cpu_id, mem, size, 0, flags);
|
||||
mem = hsakmt_fmm_allocate_host(ctx, gpu_id, cpu_id, mem, size, 0, flags);
|
||||
}
|
||||
|
||||
if (!mem) {
|
||||
@@ -366,35 +400,36 @@ void *hsakmt_allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uin
|
||||
HsaMemMapFlags map_flags = {0};
|
||||
HSAKMT_STATUS result;
|
||||
|
||||
result = hsaKmtMapMemoryToGPUNodes(mem, size, &gpu_va, map_flags, 1, nodes_array);
|
||||
result = hsaKmtMapMemoryToGPUNodesCtx(ctx, mem, size, &gpu_va, map_flags, 1, nodes_array);
|
||||
if (result != HSAKMT_STATUS_SUCCESS) {
|
||||
hsaKmtFreeMemory(mem, size);
|
||||
hsaKmtFreeMemoryCtx(ctx, mem, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
if (hsaKmtMapMemoryToGPU(mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
|
||||
hsaKmtFreeMemory(mem, size);
|
||||
if (hsaKmtMapMemoryToGPUCtx(ctx, mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
|
||||
hsaKmtFreeMemoryCtx(ctx, mem, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
void hsakmt_free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align)
|
||||
void hsakmt_free_exec_aligned_memory_gpu(HsaKFDContext *ctx, void *addr, uint32_t size, uint32_t align)
|
||||
{
|
||||
size = ALIGN_UP(size, align);
|
||||
|
||||
if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS)
|
||||
hsaKmtFreeMemory(addr, size);
|
||||
if (hsaKmtUnmapMemoryToGPUCtx(ctx, addr) == HSAKMT_STATUS_SUCCESS)
|
||||
hsaKmtFreeMemoryCtx(ctx, addr, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocates memory aligned to sysconf(_SC_PAGESIZE)
|
||||
*/
|
||||
static void *allocate_exec_aligned_memory(uint32_t size,
|
||||
static void *allocate_exec_aligned_memory(HsaKFDContext *ctx,
|
||||
uint32_t size,
|
||||
bool use_ats,
|
||||
uint32_t gpu_id,
|
||||
uint32_t NodeId,
|
||||
@@ -403,17 +438,19 @@ static void *allocate_exec_aligned_memory(uint32_t size,
|
||||
bool Uncached)
|
||||
{
|
||||
if (!use_ats)
|
||||
return hsakmt_allocate_exec_aligned_memory_gpu(size, PAGE_SIZE, gpu_id, NodeId,
|
||||
return hsakmt_allocate_exec_aligned_memory_gpu(ctx,
|
||||
size, PAGE_SIZE, gpu_id, NodeId,
|
||||
nonPaged, DeviceLocal,
|
||||
Uncached);
|
||||
return allocate_exec_aligned_memory_cpu(size);
|
||||
}
|
||||
|
||||
static void free_exec_aligned_memory(void *addr, uint32_t size, uint32_t align,
|
||||
static void free_exec_aligned_memory(HsaKFDContext *ctx,
|
||||
void *addr, uint32_t size, uint32_t align,
|
||||
bool use_ats)
|
||||
{
|
||||
if (!use_ats)
|
||||
hsakmt_free_exec_aligned_memory_gpu(addr, size, align);
|
||||
hsakmt_free_exec_aligned_memory_gpu(ctx, addr, size, align);
|
||||
else
|
||||
munmap(addr, size);
|
||||
}
|
||||
@@ -454,20 +491,20 @@ static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
|
||||
return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
|
||||
}
|
||||
|
||||
static void free_queue(struct queue *q)
|
||||
static void free_queue(HsaKFDContext *ctx, struct queue *q)
|
||||
{
|
||||
if (q->eop_buffer)
|
||||
free_exec_aligned_memory(q->eop_buffer,
|
||||
free_exec_aligned_memory(ctx, q->eop_buffer,
|
||||
q->eop_buffer_size,
|
||||
PAGE_SIZE, q->use_ats);
|
||||
if (q->unified_ctx_save_restore)
|
||||
munmap(q->ctx_save_restore, q->total_mem_alloc_size);
|
||||
else if (q->ctx_save_restore)
|
||||
free_exec_aligned_memory(q->ctx_save_restore,
|
||||
free_exec_aligned_memory(ctx, q->ctx_save_restore,
|
||||
q->total_mem_alloc_size,
|
||||
PAGE_SIZE, q->use_ats);
|
||||
|
||||
free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->use_ats);
|
||||
free_exec_aligned_memory(ctx, (void *)q, sizeof(*q), PAGE_SIZE, q->use_ats);
|
||||
}
|
||||
|
||||
static inline void fill_cwsr_header(struct queue *q, void *addr,
|
||||
@@ -488,7 +525,8 @@ static inline void fill_cwsr_header(struct queue *q, void *addr,
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_concrete_asic(struct queue *q,
|
||||
static int handle_concrete_asic(HsaKFDContext *ctx,
|
||||
struct queue *q,
|
||||
struct kfd_ioctl_create_queue_args *args,
|
||||
uint32_t gpu_id,
|
||||
uint32_t NodeId,
|
||||
@@ -503,7 +541,8 @@ static int handle_concrete_asic(struct queue *q,
|
||||
|
||||
if (q->eop_buffer_size > 0) {
|
||||
pr_info("Allocating VRAM for EOP\n");
|
||||
q->eop_buffer = allocate_exec_aligned_memory(q->eop_buffer_size,
|
||||
q->eop_buffer = allocate_exec_aligned_memory(ctx,
|
||||
q->eop_buffer_size,
|
||||
q->use_ats, gpu_id,
|
||||
NodeId, true, true, /* Unused for VRAM */false);
|
||||
if (!q->eop_buffer)
|
||||
@@ -513,12 +552,12 @@ static int handle_concrete_asic(struct queue *q,
|
||||
args->eop_buffer_size = q->eop_buffer_size;
|
||||
}
|
||||
|
||||
ret = update_ctx_save_restore_size(NodeId, q);
|
||||
ret = update_ctx_save_restore_size(ctx, NodeId, q);
|
||||
|
||||
if (ret) {
|
||||
HsaNodeProperties node;
|
||||
|
||||
if (hsaKmtGetNodeProperties(NodeId, &node))
|
||||
if (hsaKmtGetNodePropertiesCtx(ctx, NodeId, &node))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
args->ctx_save_restore_size = q->ctx_save_restore_size;
|
||||
@@ -568,7 +607,7 @@ static int handle_concrete_asic(struct queue *q,
|
||||
}
|
||||
|
||||
if (!q->unified_ctx_save_restore) {
|
||||
q->ctx_save_restore = allocate_exec_aligned_memory(
|
||||
q->ctx_save_restore = allocate_exec_aligned_memory(ctx,
|
||||
q->total_mem_alloc_size,
|
||||
q->use_ats, gpu_id, NodeId,
|
||||
false, false, false);
|
||||
@@ -591,24 +630,26 @@ static int handle_concrete_asic(struct queue *q,
|
||||
*/
|
||||
static uint32_t priority_map[] = {0, 3, 5, 7, 9, 11, 15};
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
|
||||
HSA_QUEUE_TYPE Type,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
void *QueueAddress,
|
||||
HSAuint64 QueueSizeInBytes,
|
||||
HsaEvent *Event,
|
||||
HsaQueueResource *QueueResource)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSA_QUEUE_TYPE Type,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
void *QueueAddress,
|
||||
HSAuint64 QueueSizeInBytes,
|
||||
HsaEvent *Event,
|
||||
HsaQueueResource *QueueResource)
|
||||
{
|
||||
if (Type == HSA_QUEUE_SDMA_BY_ENG_ID)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0,
|
||||
return hsaKmtCreateQueueExtCtx(ctx, NodeId, Type, QueuePercentage, Priority, 0,
|
||||
QueueAddress, QueueSizeInBytes, Event,
|
||||
QueueResource);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExtCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSA_QUEUE_TYPE Type,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
@@ -628,6 +669,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
struct hsa_kfd_queue_context *queue_ctx = hsakmt_kfdcontext_get_queue_context(ctx);
|
||||
|
||||
if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
|
||||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -636,7 +679,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct queue *q = allocate_exec_aligned_memory(sizeof(*q),
|
||||
struct queue *q = allocate_exec_aligned_memory(ctx, sizeof(*q),
|
||||
false, gpu_id, NodeId, true, false, true);
|
||||
if (!q)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
@@ -656,7 +699,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
/* By default, CUs are all turned on. Initialize cu_mask to '1
|
||||
* for all CU bits.
|
||||
*/
|
||||
if (hsaKmtGetNodeProperties(NodeId, &props))
|
||||
if (hsaKmtGetNodePropertiesCtx(ctx, NodeId, &props))
|
||||
q->cu_mask_count = 0;
|
||||
else {
|
||||
cu_num = props.NumFComputeCores / props.NumSIMDPerCU;
|
||||
@@ -695,9 +738,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
|
||||
}
|
||||
|
||||
err = handle_concrete_asic(q, &args, gpu_id, NodeId, Event, QueueResource->ErrorReason);
|
||||
err = handle_concrete_asic(ctx, q, &args, gpu_id, NodeId, Event, QueueResource->ErrorReason);
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
free_queue(q);
|
||||
free_queue(ctx, q);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -709,10 +752,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
args.queue_priority = priority_map[Priority+3];
|
||||
args.sdma_engine_id = SdmaEngineId;
|
||||
|
||||
err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
|
||||
err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_CREATE_QUEUE, &args);
|
||||
|
||||
if (err == -1) {
|
||||
free_queue(q);
|
||||
free_queue(ctx, q);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -737,20 +780,21 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
doorbell_offset = q->queue_id * DOORBELL_SIZE(q->gfxv);
|
||||
}
|
||||
|
||||
err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
|
||||
err = map_doorbell(ctx, NodeId, gpu_id, doorbell_mmap_offset);
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
hsaKmtDestroyQueue(q->queue_id);
|
||||
hsaKmtDestroyQueueCtx(ctx, q->queue_id);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
|
||||
QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].mapping,
|
||||
QueueResource->Queue_DoorBell = VOID_PTR_ADD(queue_ctx->doorbells[NodeId].mapping,
|
||||
doorbell_offset);
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueueCtx(HsaKFDContext *ctx,
|
||||
HSA_QUEUEID QueueId,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
void *QueueAddress,
|
||||
@@ -774,7 +818,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
|
||||
arg.queue_percentage = QueuePercentage;
|
||||
arg.queue_priority = priority_map[Priority+3];
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
|
||||
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
|
||||
|
||||
if (err == -1)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -782,7 +826,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueueCtx(HsaKFDContext *ctx,
|
||||
HSA_QUEUEID QueueId)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -794,20 +839,21 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
|
||||
|
||||
args.queue_id = q->queue_id;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
|
||||
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
|
||||
|
||||
if (err == -1) {
|
||||
pr_err("Failed to destroy queue: %s\n", strerror(errno));
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
free_queue(q);
|
||||
free_queue(ctx, q);
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
|
||||
HSAuint32 CUMaskCount,
|
||||
HSAuint32 *QueueCUMask)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMaskCtx(HsaKFDContext *ctx,
|
||||
HSA_QUEUEID QueueId,
|
||||
HSAuint32 CUMaskCount,
|
||||
HSAuint32 *QueueCUMask)
|
||||
{
|
||||
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
|
||||
struct kfd_ioctl_set_cu_mask_args args = {0};
|
||||
@@ -821,7 +867,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
|
||||
args.num_cu_mask = CUMaskCount;
|
||||
args.cu_mask_ptr = (uintptr_t)QueueCUMask;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_CU_MASK, &args);
|
||||
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_CU_MASK, &args);
|
||||
|
||||
if (err == -1)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -832,12 +878,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtGetQueueInfo(
|
||||
HSA_QUEUEID QueueId,
|
||||
HsaQueueInfo *QueueInfo
|
||||
)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetQueueInfoCtx(HsaKFDContext *ctx,
|
||||
HSA_QUEUEID QueueId,
|
||||
HsaQueueInfo *QueueInfo)
|
||||
{
|
||||
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
|
||||
struct kfd_ioctl_get_queue_wave_state_args args = {0};
|
||||
@@ -853,7 +896,7 @@ hsaKmtGetQueueInfo(
|
||||
args.queue_id = q->queue_id;
|
||||
args.ctl_stack_address = (uintptr_t)q->ctx_save_restore;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
QueueInfo->ControlStackTop = (void *)(args.ctl_stack_address +
|
||||
@@ -871,7 +914,8 @@ hsaKmtGetQueueInfo(
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandlerCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 Node,
|
||||
void *TrapHandlerBaseAddress,
|
||||
HSAuint64 TrapHandlerSizeInBytes,
|
||||
void *TrapBufferBaseAddress,
|
||||
@@ -891,7 +935,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
|
||||
args.tba_addr = (uintptr_t)TrapHandlerBaseAddress;
|
||||
args.tma_addr = (uintptr_t)TrapBufferBaseAddress;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);
|
||||
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);
|
||||
|
||||
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -921,12 +965,10 @@ uint32_t *hsakmt_convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues)
|
||||
return queue_ids_ptr;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAllocQueueGWS(
|
||||
HSA_QUEUEID QueueId,
|
||||
HSAuint32 nGWS,
|
||||
HSAuint32 *firstGWS)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocQueueGWSCtx(HsaKFDContext *ctx,
|
||||
HSA_QUEUEID QueueId,
|
||||
HSAuint32 nGWS,
|
||||
HSAuint32 *firstGWS)
|
||||
{
|
||||
struct kfd_ioctl_alloc_queue_gws_args args = {0};
|
||||
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
|
||||
@@ -936,7 +978,7 @@ hsaKmtAllocQueueGWS(
|
||||
args.queue_id = (HSAuint32)q->queue_id;
|
||||
args.num_gws = nGWS;
|
||||
|
||||
int err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_ALLOC_QUEUE_GWS, &args);
|
||||
int err = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_ALLOC_QUEUE_GWS, &args);
|
||||
|
||||
if (!err && firstGWS)
|
||||
*firstGWS = args.first_gws;
|
||||
@@ -952,3 +994,85 @@ hsaKmtAllocQueueGWS(
|
||||
else
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
|
||||
HSA_QUEUE_TYPE Type,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
void *QueueAddress,
|
||||
HSAuint64 QueueSizeInBytes,
|
||||
HsaEvent *Event,
|
||||
HsaQueueResource *QueueResource)
|
||||
{
|
||||
if (Type == HSA_QUEUE_SDMA_BY_ENG_ID)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0,
|
||||
QueueAddress, QueueSizeInBytes, Event,
|
||||
QueueResource);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId,
|
||||
HSA_QUEUE_TYPE Type,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
HSAuint32 SdmaEngineId,
|
||||
void *QueueAddress,
|
||||
HSAuint64 QueueSizeInBytes,
|
||||
HsaEvent *Event,
|
||||
HsaQueueResource *QueueResource)
|
||||
{
|
||||
|
||||
return hsaKmtCreateQueueExtCtx(&hsakmt_primary_kfd_ctx, NodeId, Type,
|
||||
QueuePercentage, Priority, SdmaEngineId, QueueAddress,
|
||||
QueueSizeInBytes, Event, QueueResource);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
|
||||
HSAuint32 QueuePercentage,
|
||||
HSA_QUEUE_PRIORITY Priority,
|
||||
void *QueueAddress,
|
||||
HSAuint64 QueueSize,
|
||||
HsaEvent *Event)
|
||||
{
|
||||
return hsaKmtUpdateQueueCtx(&hsakmt_primary_kfd_ctx, QueueId, QueuePercentage,
|
||||
Priority, QueueAddress, QueueSize, Event);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
|
||||
{
|
||||
return hsaKmtDestroyQueueCtx(&hsakmt_primary_kfd_ctx, QueueId);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
|
||||
HSAuint32 CUMaskCount,
|
||||
HSAuint32 *QueueCUMask)
|
||||
{
|
||||
return hsaKmtSetQueueCUMaskCtx(&hsakmt_primary_kfd_ctx, QueueId, CUMaskCount, QueueCUMask);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetQueueInfo(
|
||||
HSA_QUEUEID QueueId,
|
||||
HsaQueueInfo *QueueInfo)
|
||||
{
|
||||
return hsaKmtGetQueueInfoCtx(&hsakmt_primary_kfd_ctx, QueueId, QueueInfo);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
|
||||
void *TrapHandlerBaseAddress,
|
||||
HSAuint64 TrapHandlerSizeInBytes,
|
||||
void *TrapBufferBaseAddress,
|
||||
HSAuint64 TrapBufferSizeInBytes)
|
||||
{
|
||||
return hsaKmtSetTrapHandlerCtx(&hsakmt_primary_kfd_ctx, Node,
|
||||
TrapHandlerBaseAddress, TrapHandlerSizeInBytes,
|
||||
TrapBufferBaseAddress, TrapBufferSizeInBytes);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocQueueGWS(HSA_QUEUEID QueueId,
|
||||
HSAuint32 nGWS,
|
||||
HSAuint32 *firstGWS)
|
||||
{
|
||||
return hsaKmtAllocQueueGWSCtx(&hsakmt_primary_kfd_ctx, QueueId, nGWS, firstGWS);
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMAcquire(HSAuint32 PreferredNode)
|
||||
args.op = KFD_IOCTL_SPM_OP_ACQUIRE;
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
|
||||
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RLC_SPM, &args);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -72,7 +72,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode,
|
||||
args.op = KFD_IOCTL_SPM_OP_SET_DEST_BUF;
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
|
||||
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RLC_SPM, &args);
|
||||
|
||||
*SizeCopied = args.bytes_copied;
|
||||
*isSPMDataLoss = args.has_data_loss;
|
||||
@@ -96,7 +96,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSPMRelease(HSAuint32 PreferredNode)
|
||||
args.op = KFD_IOCTL_SPM_OP_RELEASE;
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
|
||||
ret = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_RLC_SPM, &args);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -37,7 +37,8 @@
|
||||
/* Helper functions for calling KFD SVM ioctl */
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
hsaKmtSVMSetAttrCtx(HsaKFDContext *ctx,
|
||||
void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
HSA_SVM_ATTRIBUTE *attrs)
|
||||
{
|
||||
struct kfd_ioctl_svm_args *args;
|
||||
@@ -94,7 +95,7 @@ hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
}
|
||||
|
||||
/* Driver does one copy_from_user, with extra attrs size */
|
||||
r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
|
||||
r = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
|
||||
if (r) {
|
||||
pr_debug("op set range attrs failed %s\n", strerror(errno));
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -104,7 +105,8 @@ hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
hsaKmtSVMGetAttrCtx(HsaKFDContext *ctx,
|
||||
void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
HSA_SVM_ATTRIBUTE *attrs)
|
||||
{
|
||||
struct kfd_ioctl_svm_args *args;
|
||||
@@ -150,7 +152,7 @@ hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
}
|
||||
|
||||
/* Driver does one copy_from_user, with extra attrs size */
|
||||
r = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
|
||||
r = hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
|
||||
if (r) {
|
||||
pr_debug("op get range attrs failed %s\n", strerror(errno));
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -187,7 +189,7 @@ hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS
|
||||
hsaKmtSetGetXNACKMode(HSAint32 * enable)
|
||||
hsaKmtSetGetXNACKModeCtx(HsaKFDContext *ctx, HSAint32 * enable)
|
||||
{
|
||||
struct kfd_ioctl_set_xnack_mode_args args;
|
||||
|
||||
@@ -196,7 +198,7 @@ hsaKmtSetGetXNACKMode(HSAint32 * enable)
|
||||
|
||||
args.xnack_enabled = *enable;
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_XNACK_MODE, &args)) {
|
||||
if (hsakmt_ioctl(ctx->fd, AMDKFD_IOC_SET_XNACK_MODE, &args)) {
|
||||
if (errno == EPERM) {
|
||||
pr_debug("set mode not supported %s\n",
|
||||
strerror(errno));
|
||||
@@ -213,6 +215,40 @@ hsaKmtSetGetXNACKMode(HSAint32 * enable)
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtSetXNACKModeCtx(HsaKFDContext *ctx, HSAint32 enable)
|
||||
{
|
||||
return hsaKmtSetGetXNACKModeCtx(ctx, &enable);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtGetXNACKModeCtx(HsaKFDContext *ctx, HSAint32 * enable)
|
||||
{
|
||||
*enable = -1;
|
||||
return hsaKmtSetGetXNACKModeCtx(ctx, enable);
|
||||
}
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
HSA_SVM_ATTRIBUTE *attrs)
|
||||
{
|
||||
return hsaKmtSVMSetAttrCtx(&hsakmt_primary_kfd_ctx, start_addr, size, nattr, attrs);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
|
||||
HSA_SVM_ATTRIBUTE *attrs)
|
||||
{
|
||||
return hsaKmtSVMGetAttrCtx(&hsakmt_primary_kfd_ctx, start_addr, size, nattr, attrs);
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS
|
||||
hsaKmtSetGetXNACKMode(HSAint32 * enable)
|
||||
{
|
||||
return hsaKmtSetGetXNACKModeCtx(&hsakmt_primary_kfd_ctx, enable);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI
|
||||
hsaKmtSetXNACKMode(HSAint32 enable)
|
||||
{
|
||||
|
||||
@@ -42,7 +42,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
|
||||
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
err = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
|
||||
err = hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
|
||||
if (err < 0) {
|
||||
result = HSAKMT_STATUS_ERROR;
|
||||
} else {
|
||||
|
||||
@@ -96,7 +96,7 @@ static const char *supported_processor_vendor_name[] = {
|
||||
"\n" // POWER requires a different search method
|
||||
};
|
||||
|
||||
static HSAKMT_STATUS topology_take_snapshot(void);
|
||||
static HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx);
|
||||
static void topology_drop_snapshot(void);
|
||||
|
||||
static const struct hsa_gfxip_table gfxip_lookup_table[] = {
|
||||
@@ -645,7 +645,8 @@ static HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t sysfs_node_id, uint32_t
|
||||
* - if corresponding drm render node is not available.
|
||||
* - if node information is not accessible (EPERM)
|
||||
*/
|
||||
static HSAKMT_STATUS topology_sysfs_check_node_supported(uint32_t sysfs_node_id, bool *is_node_supported)
|
||||
static HSAKMT_STATUS topology_sysfs_check_node_supported(HsaKFDContext *ctx,
|
||||
uint32_t sysfs_node_id, bool *is_node_supported)
|
||||
{
|
||||
uint32_t gpu_id;
|
||||
FILE *fd;
|
||||
@@ -711,7 +712,7 @@ static HSAKMT_STATUS topology_sysfs_check_node_supported(uint32_t sysfs_node_id,
|
||||
}
|
||||
|
||||
/* Open DRM Render device */
|
||||
ret_value = hsakmt_open_drm_render_device(drm_render_minor);
|
||||
ret_value = hsakmt_open_drm_render_device(ctx, drm_render_minor);
|
||||
if (ret_value > 0)
|
||||
*is_node_supported = true;
|
||||
else if (ret_value != -ENOENT && ret_value != -EPERM)
|
||||
@@ -723,7 +724,8 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props)
|
||||
HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaKFDContext *ctx,
|
||||
HsaSystemProperties *props)
|
||||
{
|
||||
FILE *fd;
|
||||
char *read_buf, *p;
|
||||
@@ -800,7 +802,7 @@ HSAKMT_STATUS hsakmt_topology_sysfs_get_system_props(HsaSystemProperties *props)
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_sysfs_nodes; i++) {
|
||||
ret = topology_sysfs_check_node_supported(i, &is_node_supported);
|
||||
ret = topology_sysfs_check_node_supported(ctx, i, &is_node_supported);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
goto sysfs_parse_failed;
|
||||
if (is_node_supported)
|
||||
@@ -1631,7 +1633,8 @@ static HSAKMT_STATUS topology_map_sysfs_to_user_node_id(uint32_t sys_node_id, ui
|
||||
* If node_to specified by the @iolink_id is not accessible the function returns HSAKMT_STATUS_NOT_SUPPORTED.
|
||||
* If node_to is accessible, then node_to is mapped from sysfs_node to user_node and returns HSAKMT_STATUS_SUCCESS.
|
||||
*/
|
||||
static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
|
||||
static HSAKMT_STATUS topology_sysfs_get_iolink_props(HsaKFDContext *ctx,
|
||||
uint32_t node_id,
|
||||
uint32_t iolink_id,
|
||||
HsaIoLinkProperties *props, bool p2pLink)
|
||||
{
|
||||
@@ -1693,7 +1696,7 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id,
|
||||
uint32_t sysfs_node_id;
|
||||
|
||||
sysfs_node_id = (uint32_t)prop_val;
|
||||
ret = topology_sysfs_check_node_supported(sysfs_node_id, &is_node_supported);
|
||||
ret = topology_sysfs_check_node_supported(ctx, sysfs_node_id, &is_node_supported);
|
||||
if (!is_node_supported) {
|
||||
ret = HSAKMT_STATUS_NOT_SUPPORTED;
|
||||
memset(props, 0, sizeof(*props));
|
||||
@@ -1955,7 +1958,7 @@ try_alt_dir:
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS topology_take_snapshot(void)
|
||||
HSAKMT_STATUS topology_take_snapshot(HsaKFDContext *ctx)
|
||||
{
|
||||
uint32_t gen_start, gen_end, i, mem_id, cache_id;
|
||||
HsaSystemProperties sys_props;
|
||||
@@ -1978,7 +1981,7 @@ retry:
|
||||
ret = topology_sysfs_get_generation(&gen_start);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
goto err;
|
||||
ret = hsakmt_topology_sysfs_get_system_props(&sys_props);
|
||||
ret = hsakmt_topology_sysfs_get_system_props(ctx, &sys_props);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
goto err;
|
||||
if (sys_props.NumNodes > 0) {
|
||||
@@ -2059,7 +2062,7 @@ retry:
|
||||
*/
|
||||
while (sys_link_id < num_ioLinks &&
|
||||
link_id < sys_props.NumNodes - 1) {
|
||||
ret = topology_sysfs_get_iolink_props(i, sys_link_id++,
|
||||
ret = topology_sysfs_get_iolink_props(ctx, i, sys_link_id++,
|
||||
&temp_props[i].link[link_id], false);
|
||||
if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
|
||||
continue;
|
||||
@@ -2080,7 +2083,7 @@ retry:
|
||||
*/
|
||||
while (sys_link_id < num_p2pLinks &&
|
||||
link_id < sys_props.NumNodes - 1) {
|
||||
ret = topology_sysfs_get_iolink_props(i, sys_link_id++,
|
||||
ret = topology_sysfs_get_iolink_props(ctx, i, sys_link_id++,
|
||||
&temp_props[i].link[link_id], true);
|
||||
if (ret == HSAKMT_STATUS_NOT_SUPPORTED) {
|
||||
continue;
|
||||
@@ -2179,7 +2182,8 @@ HSAKMT_STATUS hsakmt_gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
|
||||
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemPropertiesCtx(HsaKFDContext *ctx,
|
||||
HsaSystemProperties *SystemProperties)
|
||||
{
|
||||
HSAKMT_STATUS err = HSAKMT_STATUS_SUCCESS;
|
||||
|
||||
@@ -2198,7 +2202,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = topology_take_snapshot();
|
||||
err = topology_take_snapshot(ctx);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto out;
|
||||
|
||||
@@ -2207,11 +2211,11 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste
|
||||
if (hsakmt_use_model)
|
||||
model_init();
|
||||
|
||||
err = hsakmt_fmm_init_process_apertures(g_system->NumNodes);
|
||||
err = hsakmt_fmm_init_process_apertures(ctx, g_system->NumNodes);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto init_process_apertures_failed;
|
||||
|
||||
err = hsakmt_init_process_doorbells(g_system->NumNodes);
|
||||
err = hsakmt_init_process_doorbells(ctx, g_system->NumNodes);
|
||||
if (err != HSAKMT_STATUS_SUCCESS)
|
||||
goto init_doorbells_failed;
|
||||
|
||||
@@ -2220,7 +2224,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *Syste
|
||||
goto out;
|
||||
|
||||
init_doorbells_failed:
|
||||
hsakmt_fmm_destroy_process_apertures();
|
||||
hsakmt_fmm_destroy_process_apertures(ctx);
|
||||
init_process_apertures_failed:
|
||||
topology_drop_snapshot();
|
||||
|
||||
@@ -2229,12 +2233,12 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemPropertiesCtx(HsaKFDContext *ctx)
|
||||
{
|
||||
pthread_mutex_lock(&hsakmt_mutex);
|
||||
|
||||
hsakmt_destroy_process_doorbells();
|
||||
hsakmt_fmm_destroy_process_apertures();
|
||||
hsakmt_destroy_process_doorbells(ctx);
|
||||
hsakmt_fmm_destroy_process_apertures(ctx);
|
||||
topology_drop_snapshot();
|
||||
|
||||
pthread_mutex_unlock(&hsakmt_mutex);
|
||||
@@ -2252,7 +2256,9 @@ HSAKMT_STATUS hsakmt_topology_get_node_props(HSAuint32 NodeId,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodePropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HsaNodeProperties *NodeProperties)
|
||||
{
|
||||
HSAKMT_STATUS err;
|
||||
@@ -2278,7 +2284,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
|
||||
NodeProperties->NumMemoryBanks += NUM_OF_DGPU_HEAPS;
|
||||
else
|
||||
NodeProperties->NumMemoryBanks += NUM_OF_IGPU_HEAPS;
|
||||
if (hsakmt_fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id, &base,
|
||||
if (hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_MMIO, gpu_id, &base,
|
||||
&limit) == HSAKMT_STATUS_SUCCESS)
|
||||
NodeProperties->NumMemoryBanks += 1;
|
||||
}
|
||||
@@ -2288,7 +2294,8 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryPropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint32 NumBanks,
|
||||
HsaMemoryProperties *MemoryProperties)
|
||||
{
|
||||
@@ -2319,7 +2326,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
|
||||
/*Add LDS*/
|
||||
if (i < NumBanks &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(FMM_LDS, gpu_id,
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_LDS, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
|
||||
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LDSSizeInKB * 1024;
|
||||
@@ -2332,7 +2339,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
*/
|
||||
if (hsakmt_get_gfxv_by_node_id(NodeId) == GFX_VERSION_KAVERI && i < NumBanks &&
|
||||
g_props[NodeId].node.LocalMemSize > 0 &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(FMM_GPUVM, gpu_id,
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_GPUVM, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
|
||||
MemoryProperties[i].SizeInBytes = g_props[NodeId].node.LocalMemSize;
|
||||
@@ -2341,7 +2348,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
|
||||
/* Add SCRATCH */
|
||||
if (i < NumBanks &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(FMM_SCRATCH, gpu_id,
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_SCRATCH, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_SCRATCH;
|
||||
MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
|
||||
@@ -2350,7 +2357,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
|
||||
/* Add SVM aperture */
|
||||
if (hsakmt_topology_is_svm_needed(g_props[NodeId].node.EngineId) && i < NumBanks &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx,
|
||||
FMM_SVM, gpu_id, &MemoryProperties[i].VirtualBaseAddress,
|
||||
&aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_DEVICE_SVM;
|
||||
@@ -2360,7 +2367,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
|
||||
/* Add mmio aperture */
|
||||
if (i < NumBanks &&
|
||||
hsakmt_fmm_get_aperture_base_and_limit(FMM_MMIO, gpu_id,
|
||||
hsakmt_fmm_get_aperture_base_and_limit(ctx, FMM_MMIO, gpu_id,
|
||||
&MemoryProperties[i].VirtualBaseAddress, &aperture_limit) == HSAKMT_STATUS_SUCCESS) {
|
||||
MemoryProperties[i].HeapType = HSA_HEAPTYPE_MMIO_REMAP;
|
||||
MemoryProperties[i].SizeInBytes = (aperture_limit - MemoryProperties[i].VirtualBaseAddress) + 1;
|
||||
@@ -2372,7 +2379,8 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCachePropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint32 ProcessorId,
|
||||
HSAuint32 NumCaches,
|
||||
HsaCacheProperties *CacheProperties)
|
||||
@@ -2422,7 +2430,8 @@ HSAKMT_STATUS hsakmt_topology_get_iolink_props(HSAuint32 NodeId,
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkPropertiesCtx(HsaKFDContext *ctx,
|
||||
HSAuint32 NodeId,
|
||||
HSAuint32 NumIoLinks,
|
||||
HsaIoLinkProperties *IoLinkProperties)
|
||||
{
|
||||
@@ -2536,3 +2545,43 @@ inline uint32_t hsakmt_get_num_sysfs_nodes(void)
|
||||
{
|
||||
return num_sysfs_nodes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties)
|
||||
{
|
||||
return hsaKmtAcquireSystemPropertiesCtx(&hsakmt_primary_kfd_ctx, SystemProperties);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void)
|
||||
{
|
||||
return hsaKmtReleaseSystemPropertiesCtx(&hsakmt_primary_kfd_ctx);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeProperties(HSAuint32 NodeId,
|
||||
HsaNodeProperties *NodeProperties)
|
||||
{
|
||||
return hsaKmtGetNodePropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, NodeProperties);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId,
|
||||
HSAuint32 NumBanks,
|
||||
HsaMemoryProperties *MemoryProperties)
|
||||
{
|
||||
return hsaKmtGetNodeMemoryPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, NumBanks, MemoryProperties);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties(HSAuint32 NodeId,
|
||||
HSAuint32 ProcessorId,
|
||||
HSAuint32 NumCaches,
|
||||
HsaCacheProperties *CacheProperties)
|
||||
{
|
||||
return hsaKmtGetNodeCachePropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, ProcessorId, NumCaches, CacheProperties);
|
||||
}
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId,
|
||||
HSAuint32 NumIoLinks,
|
||||
HsaIoLinkProperties *IoLinkProperties)
|
||||
{
|
||||
return hsaKmtGetNodeIoLinkPropertiesCtx(&hsakmt_primary_kfd_ctx, NodeId, NumIoLinks, IoLinkProperties);
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ HSAKMT_STATUS hsakmt_init_kfd_version(void)
|
||||
{
|
||||
struct kfd_ioctl_get_version_args args = {0};
|
||||
|
||||
if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
|
||||
if (hsakmt_ioctl(hsakmt_primary_kfd_ctx.fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
hsakmt_kfd_version_info.KernelInterfaceMajorVersion = args.major_version;
|
||||
|
||||
Reference in New Issue
Block a user