SWDEV-447545 - Fix Enable/Disable node with hipGraph

Node can be enabled/disabled only for kernel, memcpy and memset nodes.
If the node is disabled it becomes empty node.
To maintain ordering just enqueue marker with respective node dependencies.

Change-Id: I710f3e88ab4e76c81f6f86a40a7dc61fd4c7e440


[ROCm/clr commit: e0e63eb04d]
Этот коммит содержится в:
Anusha GodavarthySurya
2024-02-27 13:46:44 +00:00
коммит произвёл Saleel Kudchadker
родитель 67543a60f1
Коммит f1e5162757
3 изменённых файлов: 9 добавлений и 6 удалений
+1 -1
Просмотреть файл
@@ -629,7 +629,7 @@ hipError_t GraphExec::Run(hipStream_t stream) {
rootCommand->release();
}
for (int i = 0; i < topoOrder_.size(); i++) {
topoOrder_[i]->EnqueueCommands(stream);
topoOrder_[i]->EnqueueCommands(reinterpret_cast<hipStream_t>(topoOrder_[i]->GetQueue()));
}
if (endCommand != nullptr) {
endCommand->enqueue();
+3
Просмотреть файл
@@ -331,6 +331,9 @@ struct GraphNode : public hipGraphNodeDOTAttribute {
(type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy ||
type_ == hipGraphNodeTypeMemset)) {
amd::Command::EventWaitList waitList;
if (!commands_.empty()) {
waitList = commands_[0]->eventWaitList();
}
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
command->enqueue();
+5 -5
Просмотреть файл
@@ -897,15 +897,14 @@ bool VirtualGPU::dispatchGenericAqlPacket(
packet_store_release(reinterpret_cast<uint32_t*>(aql_loc), header, rest);
}
ClPrint(amd::LOG_DEBUG, amd::LOG_AQL,
"HWq=0x%zx, Dispatch Header = "
"HWq=0x%zx id=%d, Dispatch Header = "
"0x%x (type=%d, barrier=%d, acquire=%d, release=%d), "
"setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], private_seg_size=%zu, "
"group_seg_size=%zu, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx "
"rptr=%u, wptr=%u",
gpu_queue_->base_address, header,
gpu_queue_->base_address, gpu_queue_->id, header,
extractAqlBits(header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE),
extractAqlBits(header, HSA_PACKET_HEADER_BARRIER,
HSA_PACKET_HEADER_WIDTH_BARRIER),
extractAqlBits(header, HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER),
extractAqlBits(header, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE,
HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE),
extractAqlBits(header, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
@@ -920,7 +919,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->group_segment_size,
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->kernel_object,
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->kernarg_address,
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->completion_signal, read, index);
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->completion_signal, read,
index);
}
hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index - 1);