diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h index 99dc019773..b4ffe3fb3d 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h @@ -1,5 +1,5 @@ /* - * Copyright © 2014 Advanced Micro Devices, Inc. + * Copyright © 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -336,6 +336,24 @@ hsaKmtCreateQueue( HsaQueueResource* QueueResource //OUT ); +/** + Creates a GPU queue with user-mode access rights +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtCreateQueueExt( + HSAuint32 NodeId, //IN + HSA_QUEUE_TYPE Type, //IN + HSAuint32 QueuePercentage, //IN + HSA_QUEUE_PRIORITY Priority, //IN + HSAuint32 SdmaEngineId, //IN + void* QueueAddress, //IN + HSAuint64 QueueSizeInBytes, //IN + HsaEvent* Event, //IN + HsaQueueResource* QueueResource //OUT + ); + /** Updates a queue */ diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h index 3ae8165b93..36aeba4673 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h @@ -503,6 +503,7 @@ typedef struct _HsaIoLinkProperties HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes + HSAuint32 RecSdmaEngIdMask; // recommended sdma engine IDs to reach maximum bandwidth HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms) } HsaIoLinkProperties; @@ -662,6 +663,7 @@ typedef enum _HSA_QUEUE_TYPE HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue HSA_QUEUE_SDMA_XGMI = 5, // XGMI optimized SDMA Queue + HSA_QUEUE_SDMA_BY_ENG_ID = 6, // Queue with specified SDMA engine ID // the following values indicate a queue type permitted to reference OS graphics // resources through the interoperation API. See [5] "HSA Graphics Interoperation diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h index 0c04283759..e132cedfb4 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args { diff --git a/projects/rocr-runtime/libhsakmt/src/queues.c b/projects/rocr-runtime/libhsakmt/src/queues.c index c263a7546a..ec0a214ead 100644 --- a/projects/rocr-runtime/libhsakmt/src/queues.c +++ b/projects/rocr-runtime/libhsakmt/src/queues.c @@ -601,6 +601,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, HSAuint64 QueueSizeInBytes, HsaEvent *Event, HsaQueueResource *QueueResource) +{ + if (Type >= HSA_QUEUE_SDMA_BY_ENG_ID) + return HSAKMT_STATUS_ERROR; + + return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0, + QueueAddress, QueueSizeInBytes, Event, + QueueResource); +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId, + HSA_QUEUE_TYPE Type, + HSAuint32 QueuePercentage, + HSA_QUEUE_PRIORITY Priority, + HSAuint32 SdmaEngineId, + void *QueueAddress, + HSAuint64 QueueSizeInBytes, + HsaEvent *Event, + HsaQueueResource *QueueResource) { HSAKMT_STATUS result; uint32_t gpu_id; @@ -664,6 +682,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, case HSA_QUEUE_SDMA_XGMI: args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_XGMI; break; + case HSA_QUEUE_SDMA_BY_ENG_ID: + args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID; + break; case HSA_QUEUE_COMPUTE_AQL: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; break; @@ -688,6 +709,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, args.ring_size = QueueSizeInBytes; args.queue_percentage = QueuePercentage; args.queue_priority = priority_map[Priority+3]; + args.sdma_engine_id = SdmaEngineId; err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args); @@ -730,7 +752,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, return HSAKMT_STATUS_SUCCESS; } - HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId, HSAuint32 QueuePercentage, HSA_QUEUE_PRIORITY Priority, diff --git a/projects/rocr-runtime/libhsakmt/src/topology.c b/projects/rocr-runtime/libhsakmt/src/topology.c index 7cb8e754fc..ae98545890 100644 --- a/projects/rocr-runtime/libhsakmt/src/topology.c +++ b/projects/rocr-runtime/libhsakmt/src/topology.c @@ -1695,6 +1695,8 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id, props->MaximumBandwidth = (uint32_t)prop_val; else if (strcmp(prop_name, "recommended_transfer_size") == 0) props->RecTransferSize = (uint32_t)prop_val; + else if (strcmp(prop_name, "recommended_sdma_engine_id_mask") == 0) + props->RecSdmaEngIdMask = (uint32_t)prop_val; else if (strcmp(prop_name, "flags") == 0) props->Flags.LinkProperty = (uint32_t)prop_val; }