From dee5bdc679dfeafffe511c28ef5096d71f9c9f48 Mon Sep 17 00:00:00 2001 From: Honglei Huang Date: Wed, 25 Jun 2025 14:41:37 +0800 Subject: [PATCH] rocr: replace direct libhsakmt calls with driver interfaces Replace direct hsakmt API calls with calls through the driver abstraction layer in queue management related functions. This includes: - CreateQueue/DestroyQueue operations - Queue update and GWS allocation - CU masking configuration Also update the corresponding error status types from HSAKMT_STATUS to hsa_status_t and adjust error handling accordingly. Signed-off-by: Honglei Huang --- .../core/runtime/amd_aql_queue.cpp | 45 ++++++++++--------- .../core/runtime/amd_blit_sdma.cpp | 4 +- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index dc451165c7..52635b7c79 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -262,18 +262,19 @@ AqlQueue::AqlQueue(core::SharedQueue* shared_queue, GpuAgent* agent, size_t req_ // is a KFD queue. The debugger may access the aperture addresses, queue // scratch base, and queue type. - HSAKMT_STATUS kmt_status; + hsa_status_t status; if (core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging) { queue_rsrc.ErrorReason = &exception_signal_->signal_.value; - kmt_status = HSAKMT_CALL(hsaKmtCreateQueueExt(node_id, HSA_QUEUE_COMPUTE_AQL, 100, priority_, 0, ring_buf_, - ring_buf_alloc_bytes_, queue_event(), &queue_rsrc)); + status = + agent->driver().CreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 100, priority_, 0, ring_buf_, + ring_buf_alloc_bytes_, queue_event(), queue_rsrc); } else { - kmt_status = HSAKMT_CALL(hsaKmtCreateQueueExt(node_id, HSA_QUEUE_COMPUTE_AQL, 100, priority_, 0, ring_buf_, - ring_buf_alloc_bytes_, NULL, &queue_rsrc)); + status = agent->driver().CreateQueue(node_id, HSA_QUEUE_COMPUTE_AQL, 100, priority_, 0, + ring_buf_, ring_buf_alloc_bytes_, NULL, queue_rsrc); } - if (kmt_status != HSAKMT_STATUS_SUCCESS) + if (status != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, - "Queue create failed at hsaKmtCreateQueue\n"); + "Queue create failed\n"); // Complete populating the doorbell signal structure. signal_.hardware_doorbell_ptr = queue_rsrc.Queue_DoorBell_aql; @@ -282,7 +283,7 @@ AqlQueue::AqlQueue(core::SharedQueue* shared_queue, GpuAgent* agent, size_t req_ amd_queue_.hsa_queue.id = this->GetQueueId(); queue_id_ = queue_rsrc.QueueId; - MAKE_NAMED_SCOPE_GUARD(QueueGuard, [&]() { HSAKMT_CALL(hsaKmtDestroyQueue(queue_id_)); }); + MAKE_NAMED_SCOPE_GUARD(QueueGuard, [&]() { agent_->driver().DestroyQueue(queue_id_); }); amd_queue_.scratch_max_use_index = UINT64_MAX; amd_queue_.alt_scratch_max_use_index = UINT64_MAX; @@ -602,23 +603,25 @@ int AqlQueue::CreateRingBufferFD(const char* ring_buf_shm_path, void AqlQueue::Suspend() { suspended_ = true; - auto err = HSAKMT_CALL(hsaKmtUpdateQueue(queue_id_, 0, priority_, ring_buf_, ring_buf_alloc_bytes_, NULL)); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtUpdateQueue failed."); + auto err = + agent_->driver().UpdateQueue(queue_id_, 0, priority_, ring_buf_, ring_buf_alloc_bytes_, NULL); + assert(err == HSA_STATUS_SUCCESS && "Update queue failed."); } void AqlQueue::Resume() { if (suspended_) { suspended_ = false; - auto err = HSAKMT_CALL(hsaKmtUpdateQueue(queue_id_, 100, priority_, ring_buf_, ring_buf_alloc_bytes_, NULL)); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtUpdateQueue failed."); + auto err = agent_->driver().UpdateQueue(queue_id_, 100, priority_, ring_buf_, + ring_buf_alloc_bytes_, NULL); + assert(err == HSA_STATUS_SUCCESS && "Update queue failed."); } } hsa_status_t AqlQueue::Inactivate() { bool active = active_.exchange(false, std::memory_order_relaxed); if (active) { - auto err = HSAKMT_CALL(hsaKmtDestroyQueue(queue_id_)); - assert(err == HSAKMT_STATUS_SUCCESS && "hsaKmtDestroyQueue failed."); + auto err = agent_->driver().DestroyQueue(queue_id_); + assert(err == HSA_STATUS_SUCCESS && "Destroy queue failed."); atomic::Fence(std::memory_order_acquire); } return HSA_STATUS_SUCCESS; @@ -630,8 +633,9 @@ hsa_status_t AqlQueue::SetPriority(HSA_QUEUE_PRIORITY priority) { } priority_ = priority; - auto err = HSAKMT_CALL(hsaKmtUpdateQueue(queue_id_, 100, priority_, ring_buf_, ring_buf_alloc_bytes_, NULL)); - return (err == HSAKMT_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES); + auto err = agent_->driver().UpdateQueue(queue_id_, 100, priority_, ring_buf_, + ring_buf_alloc_bytes_, NULL); + return (err == HSA_STATUS_SUCCESS ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES); } void AqlQueue::CheckScratchLimits() { @@ -1395,9 +1399,8 @@ hsa_status_t AqlQueue::SetCUMasking(uint32_t num_cu_mask_count, const uint32_t* } } - HSAKMT_STATUS ret = - HSAKMT_CALL(hsaKmtSetQueueCUMask(queue_id_, mask.size() * 32, reinterpret_cast(&mask[0]))); - if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + return agent_->driver().SetQueueCUMask(queue_id_, mask.size() * 32, + reinterpret_cast(&mask[0])); } // update current cu masking tracking. @@ -1895,8 +1898,8 @@ void AqlQueue::InitScratchSRD() { hsa_status_t AqlQueue::EnableGWS(int gws_slot_count) { uint32_t discard; - auto status = HSAKMT_CALL(hsaKmtAllocQueueGWS(queue_id_, gws_slot_count, &discard)); - if (status != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + auto status = agent_->driver().AllocQueueGWS(queue_id_, gws_slot_count, &discard); + if (status != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; amd_queue_.hsa_queue.type = HSA_QUEUE_TYPE_COOPERATIVE; return HSA_STATUS_SUCCESS; } diff --git a/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index 99b46df41f..71b46d3447 100644 --- a/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -225,8 +225,8 @@ hsa_status_t BlitSdma: if (queue_resource_.QueueId != 0) { // Release queue resources from the kernel - auto err = HSAKMT_CALL(hsaKmtDestroyQueue(queue_resource_.QueueId)); - assert(err == HSAKMT_STATUS_SUCCESS); + auto err = agent_->driver().DestroyQueue(queue_resource_.QueueId); + assert(err == HSA_STATUS_SUCCESS); memset(&queue_resource_, 0, sizeof(queue_resource_)); }