From 4ff5ec0a026871ffa77e21843ef07a65a15db477 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Thu, 29 Feb 2024 23:37:12 +0000 Subject: [PATCH] SWDEV-301667 - Better log - Print SWq for AQL packets, this helps correlating a stream to the HWq mapped Change-Id: I610430c0872a1abc6636027c00163ec46983cd65 [ROCm/clr commit: 984c86f4073784066a8130bb4d1859c59785eb55] --- projects/clr/rocclr/device/rocm/rocdevice.cpp | 5 +++-- projects/clr/rocclr/device/rocm/rocvirtual.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index d2e414ac9b..8a3fc9c54b 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -3002,8 +3002,9 @@ hsa_queue_t* Device::acquireQueue(uint32_t queue_size_hint, bool coop_queue, } } - ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Created hardware queue %p with size %d with priority %d," - " cooperative: %i", queue->base_address, queue_size, queue_priority, coop_queue); + ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Created SWq=%p to map on HWq=%p with " + "size %d with priority %d, cooperative: %i", + queue, queue->base_address, queue_size, queue_priority, coop_queue); hsa_amd_profiling_set_profiler_enabled(queue, 1); if (cuMask.size() != 0 || info_.globalCUMask_.size() != 0) { diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 52ef4de469..6f2f879e9a 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -897,12 +897,12 @@ bool VirtualGPU::dispatchGenericAqlPacket( packet_store_release(reinterpret_cast(aql_loc), header, rest); } ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, - "HWq=0x%zx id=%d, Dispatch Header = " + "SWq=0x%zx, HWq=0x%zx, id=%d, Dispatch Header = " "0x%x (type=%d, barrier=%d, acquire=%d, release=%d), " "setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], private_seg_size=%zu, " "group_seg_size=%zu, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx " "rptr=%u, wptr=%u", - gpu_queue_->base_address, gpu_queue_->id, header, + gpu_queue_, gpu_queue_->base_address, gpu_queue_->id, header, extractAqlBits(header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE), extractAqlBits(header, HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER), extractAqlBits(header, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE, @@ -1071,10 +1071,10 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal, hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index); ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, - "HWq=0x%zx, BarrierAND Header = 0x%x (type=%d, barrier=%d, acquire=%d," + "SWq=0x%zx, HWq=0x%zx, id=%d, BarrierAND Header = 0x%x (type=%d, barrier=%d, acquire=%d," " release=%d), " "dep_signal=[0x%zx, 0x%zx, 0x%zx, 0x%zx, 0x%zx], completion_signal=0x%zx", - gpu_queue_->base_address, packetHeader, + gpu_queue_, gpu_queue_->base_address, gpu_queue_->id, packetHeader, extractAqlBits(packetHeader, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE), extractAqlBits(packetHeader, HSA_PACKET_HEADER_BARRIER, @@ -1150,10 +1150,10 @@ void VirtualGPU::dispatchBarrierValuePacket(uint16_t packetHeader, bool resolveD hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index); ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, - "HWq=0x%zx, BarrierValue Header = 0x%x AmdFormat = 0x%x " + "SWq=0x%zx, HWq=0x%zx, id=%d, BarrierValue Header = 0x%x AmdFormat = 0x%x " "(type=%d, barrier=%d, acquire=%d, release=%d), " "signal=0x%zx, value = 0x%llx mask = 0x%llx cond: %s, completion_signal=0x%zx", - gpu_queue_->base_address, packetHeader, rest, + gpu_queue_, gpu_queue_->base_address, gpu_queue_->id, packetHeader, rest, extractAqlBits(packetHeader, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE), extractAqlBits(packetHeader, HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER), extractAqlBits(packetHeader, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE,