From 2412b277265e5706c819eaa550b340da8bc1ed3b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 13 Aug 2024 14:49:43 -0400 Subject: [PATCH] libhsakmt: Extend thunk queue creation with recommended sdma engines Extend the current Thunk implementation of queue creation to target specific SDMA engine IDs. Also expose the new recommend SDMA engines per IO link from the KFD sysfs. Change-Id: I51f9a0d83c0f1fc4d5dc837f879a7ae332e7d7e9 [ROCm/ROCR-Runtime commit: 2f588a24065f41c208c3701945e20be746d8faf7] --- .../libhsakmt/include/hsakmt/hsakmt.h | 20 +++++++++++++++- .../libhsakmt/include/hsakmt/hsakmttypes.h | 2 ++ .../include/hsakmt/linux/kfd_ioctl.h | 6 ++++- projects/rocr-runtime/libhsakmt/src/queues.c | 23 ++++++++++++++++++- .../rocr-runtime/libhsakmt/src/topology.c | 2 ++ 5 files changed, 50 insertions(+), 3 deletions(-) diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h index 99dc019773..b4ffe3fb3d 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h @@ -1,5 +1,5 @@ /* - * Copyright © 2014 Advanced Micro Devices, Inc. + * Copyright © 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -336,6 +336,24 @@ hsaKmtCreateQueue( HsaQueueResource* QueueResource //OUT ); +/** + Creates a GPU queue with user-mode access rights +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtCreateQueueExt( + HSAuint32 NodeId, //IN + HSA_QUEUE_TYPE Type, //IN + HSAuint32 QueuePercentage, //IN + HSA_QUEUE_PRIORITY Priority, //IN + HSAuint32 SdmaEngineId, //IN + void* QueueAddress, //IN + HSAuint64 QueueSizeInBytes, //IN + HsaEvent* Event, //IN + HsaQueueResource* QueueResource //OUT + ); + /** Updates a queue */ diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h index 3ae8165b93..36aeba4673 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h @@ -503,6 +503,7 @@ typedef struct _HsaIoLinkProperties HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes + HSAuint32 RecSdmaEngIdMask; // recommended sdma engine IDs to reach maximum bandwidth HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms) } HsaIoLinkProperties; @@ -662,6 +663,7 @@ typedef enum _HSA_QUEUE_TYPE HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue HSA_QUEUE_SDMA_XGMI = 5, // XGMI optimized SDMA Queue + HSA_QUEUE_SDMA_BY_ENG_ID = 6, // Queue with specified SDMA engine ID // the following values indicate a queue type permitted to reference OS graphics // resources through the interoperation API. See [5] "HSA Graphics Interoperation diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h index 0c04283759..e132cedfb4 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args { diff --git a/projects/rocr-runtime/libhsakmt/src/queues.c b/projects/rocr-runtime/libhsakmt/src/queues.c index c263a7546a..ec0a214ead 100644 --- a/projects/rocr-runtime/libhsakmt/src/queues.c +++ b/projects/rocr-runtime/libhsakmt/src/queues.c @@ -601,6 +601,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, HSAuint64 QueueSizeInBytes, HsaEvent *Event, HsaQueueResource *QueueResource) +{ + if (Type >= HSA_QUEUE_SDMA_BY_ENG_ID) + return HSAKMT_STATUS_ERROR; + + return hsaKmtCreateQueueExt(NodeId, Type, QueuePercentage, Priority, 0, + QueueAddress, QueueSizeInBytes, Event, + QueueResource); +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueueExt(HSAuint32 NodeId, + HSA_QUEUE_TYPE Type, + HSAuint32 QueuePercentage, + HSA_QUEUE_PRIORITY Priority, + HSAuint32 SdmaEngineId, + void *QueueAddress, + HSAuint64 QueueSizeInBytes, + HsaEvent *Event, + HsaQueueResource *QueueResource) { HSAKMT_STATUS result; uint32_t gpu_id; @@ -664,6 +682,9 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, case HSA_QUEUE_SDMA_XGMI: args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_XGMI; break; + case HSA_QUEUE_SDMA_BY_ENG_ID: + args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID; + break; case HSA_QUEUE_COMPUTE_AQL: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; break; @@ -688,6 +709,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, args.ring_size = QueueSizeInBytes; args.queue_percentage = QueuePercentage; args.queue_priority = priority_map[Priority+3]; + args.sdma_engine_id = SdmaEngineId; err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args); @@ -730,7 +752,6 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId, return HSAKMT_STATUS_SUCCESS; } - HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId, HSAuint32 QueuePercentage, HSA_QUEUE_PRIORITY Priority, diff --git a/projects/rocr-runtime/libhsakmt/src/topology.c b/projects/rocr-runtime/libhsakmt/src/topology.c index 7cb8e754fc..ae98545890 100644 --- a/projects/rocr-runtime/libhsakmt/src/topology.c +++ b/projects/rocr-runtime/libhsakmt/src/topology.c @@ -1695,6 +1695,8 @@ static HSAKMT_STATUS topology_sysfs_get_iolink_props(uint32_t node_id, props->MaximumBandwidth = (uint32_t)prop_val; else if (strcmp(prop_name, "recommended_transfer_size") == 0) props->RecTransferSize = (uint32_t)prop_val; + else if (strcmp(prop_name, "recommended_sdma_engine_id_mask") == 0) + props->RecSdmaEngIdMask = (uint32_t)prop_val; else if (strcmp(prop_name, "flags") == 0) props->Flags.LinkProperty = (uint32_t)prop_val; }