From 44b940e03379e5ebce9eb34fc1fc2e15bf4a831c Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 23 Feb 2023 18:43:30 +0800 Subject: [PATCH] Switch to completion signal wait for amd_aql_pm4_ib processing Wait on completion signal for amd_aql_pm4_ib processing on ASICs with gfx version >= 9. Signed-off-by: Lang Yu Change-Id: Ia704d9cc5b2535dcf8564a30f694262b113f77a2 [ROCm/ROCR-Runtime commit: aec7200cb2e276be217bf9c9428d6b7fcb4a3602] --- .../core/runtime/amd_aql_queue.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index a9a2cac75b..b9d49c3c40 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -1191,6 +1191,8 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) { // To respect multi-producer semantics, first buffer commands for the queue slot. constexpr uint32_t slot_size_dw = uint32_t(slot_size_b / sizeof(uint32_t)); uint32_t slot_data[slot_size_dw]; + hsa_signal_t signal = {0}; + hsa_status_t err; if (agent_->isa()->GetMajorVersion() <= 8) { // Construct a set of PM4 to fit inside the AQL packet slot. @@ -1243,6 +1245,9 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) { constexpr uint32_t AMD_AQL_FORMAT_PM4_IB = 0x1; + err = hsa_signal_create(1, 0, NULL, &signal); + assert(err == HSA_STATUS_SUCCESS); + amd_aql_pm4_ib aql_pm4_ib{}; aql_pm4_ib.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; aql_pm4_ib.ven_hdr = AMD_AQL_FORMAT_PM4_IB; @@ -1251,6 +1256,7 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) { aql_pm4_ib.ib_jump_cmd[2] = ib_jump_cmd[2]; aql_pm4_ib.ib_jump_cmd[3] = ib_jump_cmd[3]; aql_pm4_ib.dw_cnt_remain = 0xA; + aql_pm4_ib.completion_signal = signal; memcpy(slot_data, &aql_pm4_ib, sizeof(aql_pm4_ib)); } else { @@ -1268,10 +1274,15 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) { doorbell->StoreRelease(write_idx); // Wait for the packet to be consumed. - // Should be switched to a signal wait when aql_pm4_ib can be used on all - // supported platforms. - while (queue->LoadReadIndexRelaxed() <= write_idx) { - os::YieldThread(); + if (agent_->isa()->GetMajorVersion() <= 8) { + while (queue->LoadReadIndexRelaxed() <= write_idx) + os::YieldThread(); + } else { + hsa_signal_value_t ret; + ret = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, + (uint64_t)-1, HSA_WAIT_STATE_ACTIVE); + err = hsa_signal_destroy(signal); + assert(ret == 0 && err == HSA_STATUS_SUCCESS); } }