SWDEV-447545 - Fix Enable/Disable node with hipGraph
Node can be enabled/disabled only for kernel, memcpy and memset nodes.
If the node is disabled it becomes empty node.
To maintain ordering just enqueue marker with respective node dependencies.
Change-Id: I710f3e88ab4e76c81f6f86a40a7dc61fd4c7e440
[ROCm/clr commit: e0e63eb04d]
Этот коммит содержится в:
коммит произвёл
Saleel Kudchadker
родитель
67543a60f1
Коммит
f1e5162757
@@ -629,7 +629,7 @@ hipError_t GraphExec::Run(hipStream_t stream) {
|
||||
rootCommand->release();
|
||||
}
|
||||
for (int i = 0; i < topoOrder_.size(); i++) {
|
||||
topoOrder_[i]->EnqueueCommands(stream);
|
||||
topoOrder_[i]->EnqueueCommands(reinterpret_cast<hipStream_t>(topoOrder_[i]->GetQueue()));
|
||||
}
|
||||
if (endCommand != nullptr) {
|
||||
endCommand->enqueue();
|
||||
|
||||
@@ -331,6 +331,9 @@ struct GraphNode : public hipGraphNodeDOTAttribute {
|
||||
(type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy ||
|
||||
type_ == hipGraphNodeTypeMemset)) {
|
||||
amd::Command::EventWaitList waitList;
|
||||
if (!commands_.empty()) {
|
||||
waitList = commands_[0]->eventWaitList();
|
||||
}
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
|
||||
command->enqueue();
|
||||
|
||||
@@ -897,15 +897,14 @@ bool VirtualGPU::dispatchGenericAqlPacket(
|
||||
packet_store_release(reinterpret_cast<uint32_t*>(aql_loc), header, rest);
|
||||
}
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_AQL,
|
||||
"HWq=0x%zx, Dispatch Header = "
|
||||
"HWq=0x%zx id=%d, Dispatch Header = "
|
||||
"0x%x (type=%d, barrier=%d, acquire=%d, release=%d), "
|
||||
"setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], private_seg_size=%zu, "
|
||||
"group_seg_size=%zu, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx "
|
||||
"rptr=%u, wptr=%u",
|
||||
gpu_queue_->base_address, header,
|
||||
gpu_queue_->base_address, gpu_queue_->id, header,
|
||||
extractAqlBits(header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE),
|
||||
extractAqlBits(header, HSA_PACKET_HEADER_BARRIER,
|
||||
HSA_PACKET_HEADER_WIDTH_BARRIER),
|
||||
extractAqlBits(header, HSA_PACKET_HEADER_BARRIER, HSA_PACKET_HEADER_WIDTH_BARRIER),
|
||||
extractAqlBits(header, HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE,
|
||||
HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE),
|
||||
extractAqlBits(header, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
|
||||
@@ -920,7 +919,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->group_segment_size,
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->kernel_object,
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->kernarg_address,
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->completion_signal, read, index);
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(packet)->completion_signal, read,
|
||||
index);
|
||||
}
|
||||
|
||||
hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index - 1);
|
||||
|
||||
Ссылка в новой задаче
Block a user