From bc0bd007468be656d640f8f21ce2e20b8415e91c Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Tue, 11 Jul 2017 00:28:35 -0500 Subject: [PATCH] Fix queue interception in tools. 1. Correct amd::AqlQueue::ExecutePM4 to support interception. 2. Minor fixes to AqlPacket and SoftCP. 3. Minimal change to disable interception of runtime internal queues. Change-Id: I103fece2ebf9a188d27f01e61221c737405d7253 --- runtime/hsa-runtime/core/inc/queue.h | 8 +++----- .../core/runtime/amd_aql_queue.cpp | 20 +++++++++++-------- .../core/runtime/amd_gpu_agent.cpp | 16 ++++----------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/queue.h b/runtime/hsa-runtime/core/inc/queue.h index 5ec43bd7b9..5804449faf 100644 --- a/runtime/hsa-runtime/core/inc/queue.h +++ b/runtime/hsa-runtime/core/inc/queue.h @@ -64,15 +64,13 @@ struct AqlPacket { hsa_agent_dispatch_packet_t agent; }; - uint8_t type() { + uint8_t type() const { return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)); } - bool IsValid() { - const uint8_t packet_type = dispatch.header >> HSA_PACKET_HEADER_TYPE; - return (packet_type > HSA_PACKET_TYPE_INVALID && - packet_type <= HSA_PACKET_TYPE_BARRIER_OR); + bool IsValid() const { + return (type() <= HSA_PACKET_TYPE_BARRIER_OR) & (type() != HSA_PACKET_TYPE_INVALID); } std::string string() const { diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 9b316fd530..9d680ace43 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -775,17 +775,20 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) { // pm4_ib_buf_ is a shared resource, so mutually exclude here. ScopedAcquire lock(&pm4_ib_mutex_); - // Obtain a queue slot for a single AQL packet. - uint64_t write_idx = AddWriteIndexAcqRel(1); + // Obtain reference to any container queue. + core::Queue* queue = core::Queue::Convert(public_handle()); - while ((write_idx - LoadReadIndexRelaxed()) > public_handle()->size) { + // Obtain a queue slot for a single AQL packet. + uint64_t write_idx = queue->AddWriteIndexAcqRel(1); + + while ((write_idx - queue->LoadReadIndexRelaxed()) > queue->amd_queue_.hsa_queue.size) { os::YieldThread(); } - uint32_t slot_idx = uint32_t(write_idx % public_handle()->size); + uint32_t slot_idx = uint32_t(write_idx % queue->amd_queue_.hsa_queue.size); constexpr uint32_t slot_size_b = 0x40; uint32_t* queue_slot = - (uint32_t*)(uintptr_t(public_handle()->base_address) + (slot_idx * slot_size_b)); + (uint32_t*)(uintptr_t(queue->amd_queue_.hsa_queue.base_address) + (slot_idx * slot_size_b)); // Copy client PM4 command into IB. assert(cmd_size_b < pm4_ib_size_b_ && "PM4 exceeds IB size"); @@ -877,12 +880,13 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) { atomic::Store(&queue_slot[0], slot_data[0], std::memory_order_release); // Submit the packet slot. - core::Signal* doorbell = - core::Signal::Convert(public_handle()->doorbell_signal); + core::Signal* doorbell = core::Signal::Convert(queue->amd_queue_.hsa_queue.doorbell_signal); doorbell->StoreRelease(write_idx); // Wait for the packet to be consumed. - while (LoadReadIndexRelaxed() <= write_idx) { + // Should be switched to a signal wait when aql_pm4_ib can be used on all + // supported platforms. + while (queue->LoadReadIndexRelaxed() <= write_idx) { os::YieldThread(); } } diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 3134e87d11..52d7608c47 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -512,18 +512,10 @@ hsa_status_t GpuAgent::VisitRegion( } core::Queue* GpuAgent::CreateInterceptibleQueue() { - // Until tools runtime is merged in we need to use HSA API - // rather than GpuAgent::QueueCreate to allow interception. - hsa_queue_t* queue_handle; - hsa_status_t status = - HSA::hsa_queue_create(public_handle(), minAqlSize_, HSA_QUEUE_TYPE_MULTI, - NULL, NULL, 0, 0, &queue_handle); - - if (status != HSA_STATUS_SUCCESS) { - return NULL; - } - - return core::Queue::Convert(queue_handle); + // Disabled intercept of internal queues pending tools updates. + core::Queue* queue = nullptr; + QueueCreate(minAqlSize_, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &queue); + return queue; } core::Blit* GpuAgent::CreateBlitSdma() {