From f1e516275748ff45513ea7a232e8e5b94331e68f Mon Sep 17 00:00:00 2001 From: Anusha GodavarthySurya Date: Tue, 27 Feb 2024 13:46:44 +0000 Subject: [PATCH] SWDEV-447545 - Fix Enable/Disable node with hipGraph Node can be enabled/disabled only for kernel, memcpy and memset nodes. If the node is disabled it becomes empty node. To maintain ordering just enqueue marker with respective node dependencies. Change-Id: I710f3e88ab4e76c81f6f86a40a7dc61fd4c7e440 [ROCm/clr commit: e0e63eb04d3ca5bf0dabab1ee306a2786a36aa05] --- projects/clr/hipamd/src/hip_graph_internal.cpp | 2 +- projects/clr/hipamd/src/hip_graph_internal.hpp | 3 +++ projects/clr/rocclr/device/rocm/rocvirtual.cpp | 10 +++++----- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp index 2b1b556784..d35b810acc 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.cpp +++ b/projects/clr/hipamd/src/hip_graph_internal.cpp @@ -629,7 +629,7 @@ hipError_t GraphExec::Run(hipStream_t stream) { rootCommand->release(); } for (int i = 0; i < topoOrder_.size(); i++) { - topoOrder_[i]->EnqueueCommands(stream); + topoOrder_[i]->EnqueueCommands(reinterpret_cast(topoOrder_[i]->GetQueue())); } if (endCommand != nullptr) { endCommand->enqueue(); diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp index 7fee3d83b7..61ae180fa6 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.hpp +++ b/projects/clr/hipamd/src/hip_graph_internal.hpp @@ -331,6 +331,9 @@ struct GraphNode : public hipGraphNodeDOTAttribute { (type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy || type_ == hipGraphNodeTypeMemset)) { amd::Command::EventWaitList waitList; + if (!commands_.empty()) { + waitList = commands_[0]->eventWaitList(); + } hip::Stream* hip_stream = hip::getStream(stream); amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList); command->enqueue(); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 34a9d2c580..1d2da0a562 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -897,15 +897,14 @@ bool VirtualGPU::dispatchGenericAqlPacket( packet_store_release(reinterpret_cast(aql_loc), header, rest); } ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, - "HWq=0x%zx, Dispatch Header = " + "HWq=0x%zx id=%d, Dispatch Header = " "0x%x (type=%d, barrier=%d, acquire=%d, release=%d), " "setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], private_seg_size=%zu, " "group_seg_size=%zu, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx " "rptr=%u, wptr=%u", - gpu_queue_->base_address, header, + gpu_queue_->base_address, gpu_queue_->id, header, extractAqlBits(header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE), - extractAqlBits(header, HSA_PACKET_HEADER_BARRIER, - HSA_PACKET_HEADER_WIDTH_BARRIER), + extractAqlBits(header, HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER), extractAqlBits(header, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE, HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE), extractAqlBits(header, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE, @@ -920,7 +919,8 @@ bool VirtualGPU::dispatchGenericAqlPacket( reinterpret_cast(packet)->group_segment_size, reinterpret_cast(packet)->kernel_object, reinterpret_cast(packet)->kernarg_address, - reinterpret_cast(packet)->completion_signal, read, index); + reinterpret_cast(packet)->completion_signal, read, + index); } hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index - 1);