2
0
Ficheiros
rocm-systems/src/queues.c
T
Yong Zhao 55bb61ff9c Revert "Workaround: make mmap memory resident for gfx902"
This reverts commit 716755b1de.

Change-Id: I9f4f0b6b426aeae4cb652b33cf0d4c0f57270ca5
Signed-off-by: Yong Zhao <yong.zhao@amd.com>
2018-02-02 12:31:06 -05:00

752 linhas
19 KiB
C

/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "fmm.h"
#include "linux/kfd_ioctl.h"
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <math.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
/* 1024 doorbells, 4 or 8 bytes each doorbell depending on ASIC generation */
#define DOORBELL_SIZE_GFX7 4
#define DOORBELL_SIZE_GFX8 4
#define DOORBELL_SIZE_GFX9 8
#define DOORBELLS_PAGE_SIZE(ds) (1024 * (ds))
#define WG_CONTEXT_DATA_SIZE_PER_CU_VI 344576
#define WAVES_PER_CU_VI 32
struct device_info {
enum asic_family_type asic_family;
uint32_t eop_buffer_size;
uint32_t doorbell_size;
};
struct device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.eop_buffer_size = 0,
.doorbell_size = DOORBELL_SIZE_GFX7,
};
struct device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
.eop_buffer_size = 0,
.doorbell_size = DOORBELL_SIZE_GFX7,
};
struct device_info carrizo_device_info = {
.asic_family = CHIP_CARRIZO,
.eop_buffer_size = 4096,
.doorbell_size = DOORBELL_SIZE_GFX8,
};
struct device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.eop_buffer_size = TONGA_PAGE_SIZE,
.doorbell_size = DOORBELL_SIZE_GFX8,
};
struct device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
.eop_buffer_size = TONGA_PAGE_SIZE,
.doorbell_size = DOORBELL_SIZE_GFX8,
};
struct device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
.eop_buffer_size = TONGA_PAGE_SIZE,
.doorbell_size = DOORBELL_SIZE_GFX8,
};
struct device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
.eop_buffer_size = TONGA_PAGE_SIZE,
.doorbell_size = DOORBELL_SIZE_GFX8,
};
struct device_info vega10_device_info = {
.asic_family = CHIP_VEGA10,
.eop_buffer_size = 4096,
.doorbell_size = DOORBELL_SIZE_GFX9,
};
struct device_info raven_device_info = {
.asic_family = CHIP_RAVEN,
.eop_buffer_size = 4096,
.doorbell_size = DOORBELL_SIZE_GFX9,
};
static struct device_info *dev_lookup_table[] = {
[CHIP_KAVERI] = &kaveri_device_info,
[CHIP_HAWAII] = &hawaii_device_info,
[CHIP_CARRIZO] = &carrizo_device_info,
[CHIP_TONGA] = &tonga_device_info,
[CHIP_FIJI] = &fiji_device_info,
[CHIP_POLARIS10] = &polaris10_device_info,
[CHIP_POLARIS11] = &polaris11_device_info,
[CHIP_VEGA10] = &vega10_device_info,
[CHIP_RAVEN] = &raven_device_info
};
struct device_id {
uint16_t dev_id;
struct device_info *dev_info;
};
struct queue {
uint32_t queue_id;
uint64_t wptr;
uint64_t rptr;
void *eop_buffer;
void *ctx_save_restore;
uint32_t ctx_save_restore_size;
uint32_t ctl_stack_size;
const struct device_info *dev_info;
/* This queue structure is allocated from GPU with page aligned size
* but only small bytes are used. We use the extra space in the end for
* cu_mask bits array.
*/
uint32_t cu_mask_count; /* in bits */
uint32_t cu_mask[0];
};
struct process_doorbells {
bool use_gpuvm;
uint32_t size;
void *doorbells;
pthread_mutex_t doorbells_mutex;
};
static unsigned int num_doorbells;
static struct process_doorbells *doorbells;
HSAKMT_STATUS init_process_doorbells(unsigned int NumNodes)
{
unsigned int i;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
/* doorbells[] is accessed using Topology NodeId. This means doorbells[0],
* which corresponds to CPU only Node, might not be used
*/
doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
if (!doorbells)
return HSAKMT_STATUS_NO_MEMORY;
for (i = 0; i < NumNodes; i++) {
doorbells[i].use_gpuvm = false;
doorbells[i].size = 0;
doorbells[i].doorbells = NULL;
pthread_mutex_init(&doorbells[i].doorbells_mutex, NULL);
}
num_doorbells = NumNodes;
return ret;
}
static struct device_info *get_device_info_by_dev_id(uint16_t dev_id)
{
enum asic_family_type asic;
if (topology_get_asic_family(dev_id, &asic) != HSAKMT_STATUS_SUCCESS)
return NULL;
return dev_lookup_table[asic];
}
static void get_doorbell_map_info(uint16_t dev_id,
struct process_doorbells *doorbell)
{
struct device_info *dev_info;
dev_info = get_device_info_by_dev_id(dev_id);
/*
* GPUVM doorbell on Tonga requires a workaround for VM TLB ACTIVE bit
* lookup bug. Remove ASIC check when this is implemented in amdgpu.
*/
doorbell->use_gpuvm = (topology_is_dgpu(dev_id) &&
dev_info->asic_family != CHIP_TONGA);
doorbell->size = DOORBELLS_PAGE_SIZE(dev_info->doorbell_size);
}
void destroy_process_doorbells(void)
{
unsigned int i;
if (!doorbells)
return;
for (i = 0; i < num_doorbells; i++) {
if (!doorbells[i].size)
continue;
if (doorbells[i].use_gpuvm) {
fmm_unmap_from_gpu(doorbells[i].doorbells);
fmm_release(doorbells[i].doorbells);
} else
munmap(doorbells[i].doorbells, doorbells[i].size);
}
free(doorbells);
doorbells = NULL;
num_doorbells = 0;
}
/* This is a special funcion that should be called only from the child process
* after a fork(). This will clear doorbells duplicated from the parent.
*/
void clear_process_doorbells(void)
{
unsigned int i;
if (!doorbells)
return;
for (i = 0; i < num_doorbells; i++) {
if (!doorbells[i].size)
continue;
if (!doorbells[i].use_gpuvm)
munmap(doorbells[i].doorbells, doorbells[i].size);
}
free(doorbells);
doorbells = NULL;
num_doorbells = 0;
}
static HSAKMT_STATUS map_doorbell_apu(HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_offset)
{
void *ptr;
ptr = mmap(0, doorbells[NodeId].size, PROT_READ|PROT_WRITE,
MAP_SHARED, kfd_fd, doorbell_offset);
if (ptr == MAP_FAILED)
return HSAKMT_STATUS_ERROR;
doorbells[NodeId].doorbells = ptr;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS map_doorbell_dgpu(HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_offset)
{
void *ptr;
ptr = fmm_allocate_doorbell(gpu_id, doorbells[NodeId].size,
doorbell_offset);
if (!ptr)
return HSAKMT_STATUS_ERROR;
/* map for GPU access */
if (fmm_map_to_gpu(ptr, doorbells[NodeId].size, NULL)) {
fmm_release(ptr);
return HSAKMT_STATUS_ERROR;
}
doorbells[NodeId].doorbells = ptr;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_offset)
{
HSAKMT_STATUS status = HSAKMT_STATUS_SUCCESS;
pthread_mutex_lock(&doorbells[NodeId].doorbells_mutex);
if (doorbells[NodeId].size) {
pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
return HSAKMT_STATUS_SUCCESS;
}
get_doorbell_map_info(get_device_id_by_node(NodeId),
&doorbells[NodeId]);
if (doorbells[NodeId].use_gpuvm)
status = map_doorbell_dgpu(NodeId, gpu_id, doorbell_offset);
else
status = map_doorbell_apu(NodeId, gpu_id, doorbell_offset);
if (status != HSAKMT_STATUS_SUCCESS)
doorbells[NodeId].size = 0;
pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
return status;
}
static void *allocate_exec_aligned_memory_cpu(uint32_t size)
{
void *ptr;
/* mmap will return a pointer with alignment equal to
* sysconf(_SC_PAGESIZE).
*
* MAP_ANONYMOUS initializes the memory to zero.
*/
ptr = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
return NULL;
return ptr;
}
/* The bool return indicate whether the queue needs a context-save-restore area*/
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
{
HsaNodeProperties node;
if (q->dev_info->asic_family < CHIP_CARRIZO)
return false;
if (hsaKmtGetNodeProperties(nodeid, &node))
return false;
if (node.NumFComputeCores && node.NumSIMDPerCU) {
uint32_t ctl_stack_size, wg_data_size;
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU;
ctl_stack_size = cu_num * WAVES_PER_CU_VI * 8 + 8;
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU_VI;
q->ctl_stack_size = PAGE_ALIGN_UP(ctl_stack_size);
q->ctx_save_restore_size = q->ctl_stack_size + PAGE_ALIGN_UP(wg_data_size);
return true;
}
return false;
}
void *allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
uint32_t NodeId, bool nonPaged)
{
void *mem;
HSAuint64 gpu_va;
HsaMemFlags flags;
HSAKMT_STATUS ret;
flags.Value = 0;
flags.ui32.HostAccess = 1;
flags.ui32.ExecuteAccess = 1;
flags.ui32.NonPaged = nonPaged;
flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
size = ALIGN_UP(size, align);
ret = hsaKmtAllocMemory(0, size, flags, &mem);
if (ret != HSAKMT_STATUS_SUCCESS)
return NULL;
if (NodeId != 0) {
uint32_t nodes_array[1] = {NodeId};
if (hsaKmtRegisterMemoryToNodes(mem, size, 1, nodes_array) != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemory(mem, size);
return NULL;
}
}
if (hsaKmtMapMemoryToGPU(mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemory(mem, size);
return NULL;
}
return mem;
}
void free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align)
{
size = ALIGN_UP(size, align);
if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS)
hsaKmtFreeMemory(addr, size);
}
/*
* Allocates memory aligned to sysconf(_SC_PAGESIZE)
*/
static void *allocate_exec_aligned_memory(uint32_t size,
enum asic_family_type type,
uint32_t NodeId)
{
if (IS_DGPU(type))
return allocate_exec_aligned_memory_gpu(size, PAGE_SIZE, NodeId,
false);
return allocate_exec_aligned_memory_cpu(size);
}
static void free_exec_aligned_memory(void *addr, uint32_t size, uint32_t align,
enum asic_family_type type)
{
if (IS_DGPU(type))
free_exec_aligned_memory_gpu(addr, size, align);
else
munmap(addr, size);
}
static void free_queue(struct queue *q)
{
if (q->eop_buffer)
free_exec_aligned_memory(q->eop_buffer,
q->dev_info->eop_buffer_size,
PAGE_SIZE, q->dev_info->asic_family);
if (q->ctx_save_restore)
free_exec_aligned_memory(q->ctx_save_restore,
q->ctx_save_restore_size,
PAGE_SIZE, q->dev_info->asic_family);
free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->dev_info->asic_family);
}
static int handle_concrete_asic(struct queue *q,
struct kfd_ioctl_create_queue_args *args,
uint32_t NodeId)
{
const struct device_info *dev_info = q->dev_info;
if (dev_info) {
if (dev_info->eop_buffer_size > 0) {
q->eop_buffer =
allocate_exec_aligned_memory(q->dev_info->eop_buffer_size,
dev_info->asic_family,
NodeId);
if (!q->eop_buffer)
return HSAKMT_STATUS_NO_MEMORY;
args->eop_buffer_address = (uintptr_t)q->eop_buffer;
args->eop_buffer_size = dev_info->eop_buffer_size;
}
if (args->queue_type != KFD_IOC_QUEUE_TYPE_SDMA &&
update_ctx_save_restore_size(NodeId, q) == true) {
args->ctx_save_restore_size = q->ctx_save_restore_size;
args->ctl_stack_size = q->ctl_stack_size;
q->ctx_save_restore =
allocate_exec_aligned_memory(q->ctx_save_restore_size,
dev_info->asic_family,
NodeId);
if (!q->ctx_save_restore)
return HSAKMT_STATUS_NO_MEMORY;
args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;
}
}
return HSAKMT_STATUS_SUCCESS;
}
/* A map to translate thunk queue priority (-3 to +3)
* to KFD queue priority (0 to 15)
* Indexed by thunk_queue_priority+3
*/
static uint32_t priority_map[] = {0, 3, 5, 7, 9, 11, 15};
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
{
HSAKMT_STATUS result;
uint32_t gpu_id;
uint16_t dev_id;
uint64_t doorbell_mmap_offset;
unsigned int doorbell_offset;
struct device_info *dev_info;
int err;
HsaNodeProperties props;
uint32_t cu_num, i;
CHECK_KFD_OPEN();
if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
return HSAKMT_STATUS_INVALID_PARAMETER;
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
dev_id = get_device_id_by_node(NodeId);
dev_info = get_device_info_by_dev_id(dev_id);
struct queue *q = allocate_exec_aligned_memory(sizeof(*q),
dev_info->asic_family,
NodeId);
if (!q)
return HSAKMT_STATUS_NO_MEMORY;
memset(q, 0, sizeof(*q));
/* By default, CUs are all turned on. Initialize cu_mask to '1
* for all CU bits.
*/
if (hsaKmtGetNodeProperties(NodeId, &props))
q->cu_mask_count = 0;
else {
cu_num = props.NumFComputeCores / props.NumSIMDPerCU;
/* cu_mask_count counts bits. It must be multiple of 32 */
q->cu_mask_count = ALIGN_UP_32(cu_num, 32);
for (i = 0; i < cu_num; i++)
q->cu_mask[i/32] |= (1 << (i % 32));
}
struct kfd_ioctl_create_queue_args args = {0};
args.gpu_id = gpu_id;
q->dev_info = dev_info;
switch (Type) {
case HSA_QUEUE_COMPUTE:
args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE;
break;
case HSA_QUEUE_SDMA:
args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA;
break;
case HSA_QUEUE_COMPUTE_AQL:
args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
break;
default:
return HSAKMT_STATUS_INVALID_PARAMETER;
}
if (Type != HSA_QUEUE_COMPUTE_AQL) {
QueueResource->QueueRptrValue = (uintptr_t)&q->rptr;
QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
}
err = handle_concrete_asic(q, &args, NodeId);
if (err != HSAKMT_STATUS_SUCCESS) {
free_queue(q);
return err;
}
args.read_pointer_address = QueueResource->QueueRptrValue;
args.write_pointer_address = QueueResource->QueueWptrValue;
args.ring_base_address = (uintptr_t)QueueAddress;
args.ring_size = QueueSizeInBytes;
args.queue_percentage = QueuePercentage;
args.queue_priority = priority_map[Priority+3];
err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
if (err == -1) {
free_queue(q);
return HSAKMT_STATUS_ERROR;
}
q->queue_id = args.queue_id;
if (IS_SOC15(dev_info->asic_family)) {
/* On SOC15 chips, the doorbell offset within the
* doorbell page is included in the doorbell offset
* returned by KFD. This allows doorbells to be
* allocated per-device, independent of the
* per-process queue ID.
*/
doorbell_mmap_offset = args.doorbell_offset &
~(HSAuint64)(doorbells[NodeId].size - 1);
doorbell_offset = args.doorbell_offset &
(doorbells[NodeId].size - 1);
} else {
/* On older chips, the doorbell offset within the
* doorbell page is based on the queue ID.
*/
doorbell_mmap_offset = args.doorbell_offset;
doorbell_offset = q->queue_id * dev_info->doorbell_size;
}
err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
if (err != HSAKMT_STATUS_SUCCESS) {
hsaKmtDestroyQueue(q->queue_id);
free_queue(q);
return HSAKMT_STATUS_ERROR;
}
QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].doorbells,
doorbell_offset);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSize,
HsaEvent *Event)
{
struct kfd_ioctl_update_queue_args arg = {0};
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
CHECK_KFD_OPEN();
if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (!q)
return HSAKMT_STATUS_INVALID_PARAMETER;
arg.queue_id = (HSAuint32)q->queue_id;
arg.ring_base_address = (uintptr_t)QueueAddress;
arg.ring_size = QueueSize;
arg.queue_percentage = QueuePercentage;
arg.queue_priority = priority_map[Priority+3];
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
if (err == -1)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
{
CHECK_KFD_OPEN();
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_destroy_queue_args args = {0};
if (!q)
return HSAKMT_STATUS_INVALID_PARAMETER;
args.queue_id = q->queue_id;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
if (err == -1)
return HSAKMT_STATUS_ERROR;
free_queue(q);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
HSAuint32 CUMaskCount,
HSAuint32 *QueueCUMask)
{
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_set_cu_mask_args args = {0};
CHECK_KFD_OPEN();
if (CUMaskCount == 0 || !QueueCUMask || ((CUMaskCount % 32) != 0))
return HSAKMT_STATUS_INVALID_PARAMETER;
args.queue_id = q->queue_id;
args.num_cu_mask = CUMaskCount;
args.cu_mask_ptr = (uintptr_t)QueueCUMask;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_CU_MASK, &args);
if (err == -1)
return HSAKMT_STATUS_ERROR;
memcpy(q->cu_mask, QueueCUMask, CUMaskCount / 8);
q->cu_mask_count = CUMaskCount;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetQueueInfo(
HSA_QUEUEID QueueId,
HsaQueueInfo *QueueInfo
)
{
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_get_queue_wave_state_args args = {0};
CHECK_KFD_OPEN();
if (QueueInfo == NULL || q == NULL)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (q->ctx_save_restore == NULL)
return HSAKMT_STATUS_ERROR;
args.queue_id = q->queue_id;
args.ctl_stack_address = (uintptr_t)q->ctx_save_restore;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
return HSAKMT_STATUS_ERROR;
QueueInfo->ControlStackTop = (void *)(args.ctl_stack_address +
q->ctl_stack_size - args.ctl_stack_used_size);
QueueInfo->UserContextSaveArea = (void *)
(args.ctl_stack_address + q->ctl_stack_size);
QueueInfo->SaveAreaSizeInBytes = args.save_area_used_size;
QueueInfo->ControlStackUsedInBytes = args.ctl_stack_used_size;
QueueInfo->NumCUAssigned = q->cu_mask_count;
QueueInfo->CUMaskInfo = q->cu_mask;
QueueInfo->QueueDetailError = 0;
QueueInfo->QueueTypeExtended = 0;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
void *TrapHandlerBaseAddress,
HSAuint64 TrapHandlerSizeInBytes,
void *TrapBufferBaseAddress,
HSAuint64 TrapBufferSizeInBytes)
{
struct kfd_ioctl_set_trap_handler_args args = {0};
HSAKMT_STATUS result;
uint32_t gpu_id;
CHECK_KFD_OPEN();
result = validate_nodeid(Node, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
args.gpu_id = gpu_id;
args.tba_addr = (uintptr_t)TrapHandlerBaseAddress;
args.tma_addr = (uintptr_t)TrapBufferBaseAddress;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}