SWDEV-444988 - Fix __amd_rocclr_initHeap sync with DEBUG_CLR_GRAPH_PACKET_CAPTURE
When kernel does device side malloc, initial heap is allocated with __amd_rocclr_initHeap.
During graph launch kernel __amd_rocclr_initHeap is enqueued followed by actual kernel . So kernel will execute after initHeap kernel.
But with graph optimizations during capture initHeap gets enqueued on device null stream and actual kernel on graph launch stream.
So no proper synchronization. Switch to command creation and enqueue during launch for kernel node with hidden heap.
Change-Id: Iaf600251faef9a448853f19429023c118aa760b9
[ROCm/clr commit: 2dc6ec68a5]
Cette révision appartient à :
révisé par
Saleel Kudchadker
Parent
e8a077dc68
révision
49cbeaea21
@@ -351,7 +351,8 @@ hipError_t GraphExec::CaptureAQLPackets() {
|
||||
// arg size required for all graph kernel nodes to allocate
|
||||
for (const auto& list : parallelLists_) {
|
||||
for (auto& node : list) {
|
||||
if (node->GetType() == hipGraphNodeTypeKernel) {
|
||||
if (node->GetType() == hipGraphNodeTypeKernel &&
|
||||
!reinterpret_cast<hip::GraphKernelNode*>(node)->HasHiddenHeap()) {
|
||||
kernArgSizeForGraph += reinterpret_cast<hip::GraphKernelNode*>(node)->GetKerArgSize();
|
||||
}
|
||||
}
|
||||
@@ -373,7 +374,8 @@ hipError_t GraphExec::CaptureAQLPackets() {
|
||||
kernarg_pool_size_graph_ = kernArgSizeForGraph;
|
||||
}
|
||||
for (auto& node : topoOrder_) {
|
||||
if (node->GetType() == hipGraphNodeTypeKernel) {
|
||||
if (node->GetType() == hipGraphNodeTypeKernel &&
|
||||
!reinterpret_cast<hip::GraphKernelNode*>(node)->HasHiddenHeap()) {
|
||||
auto kernelNode = reinterpret_cast<hip::GraphKernelNode*>(node);
|
||||
// From the kernel pool allocate the kern arg size required for the current kernel node.
|
||||
address kernArgOffset = nullptr;
|
||||
@@ -560,16 +562,20 @@ hipError_t GraphExec::Run(hipStream_t stream) {
|
||||
if (parallelLists_.size() == 1 &&
|
||||
instantiateDeviceId_ == hip_stream->DeviceId()) {
|
||||
amd::AccumulateCommand* accumulate = nullptr;
|
||||
bool isLastPacketKernel = false;
|
||||
bool isLastKernelWithoutHiddenHeap =
|
||||
((topoOrder_.back()->GetType() == hipGraphNodeTypeKernel) &&
|
||||
!reinterpret_cast<hip::GraphKernelNode*>(topoOrder_.back())->HasHiddenHeap());
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
|
||||
uint8_t* lastCapturedPacket = (topoOrder_.back()->GetType() == hipGraphNodeTypeKernel)
|
||||
? topoOrder_.back()->GetAqlPacket()
|
||||
: nullptr;
|
||||
accumulate = new amd::AccumulateCommand(*hip_stream, {}, nullptr, lastCapturedPacket);
|
||||
uint8_t* lastCapturedPacket =
|
||||
isLastKernelWithoutHiddenHeap ? topoOrder_.back()->GetAqlPacket() : nullptr;
|
||||
if (topoOrder_.back()->GetEnabled()) {
|
||||
accumulate = new amd::AccumulateCommand(*hip_stream, {}, nullptr, lastCapturedPacket);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < topoOrder_.size() - 1; i++) {
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && topoOrder_[i]->GetType() == hipGraphNodeTypeKernel) {
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && topoOrder_[i]->GetType() == hipGraphNodeTypeKernel &&
|
||||
!reinterpret_cast<hip::GraphKernelNode*>(topoOrder_[i])->HasHiddenHeap()) {
|
||||
if (topoOrder_[i]->GetEnabled()) {
|
||||
hip_stream->vdev()->dispatchAqlPacket(topoOrder_[i]->GetAqlPacket(), accumulate);
|
||||
accumulate->addKernelName(topoOrder_[i]->GetKernelName());
|
||||
@@ -583,20 +589,18 @@ hipError_t GraphExec::Run(hipStream_t stream) {
|
||||
|
||||
// If last captured packet is kernel, optimize to detect completion of last kernel
|
||||
// This saves on extra packet submitted to determine end of graph
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && topoOrder_.back()->GetType() == hipGraphNodeTypeKernel) {
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && isLastKernelWithoutHiddenHeap) {
|
||||
// Add the last kernel node name to the accumulate command
|
||||
accumulate->addKernelName(topoOrder_.back()->GetKernelName());
|
||||
accumulate->enqueue();
|
||||
accumulate->release();
|
||||
isLastPacketKernel = true;
|
||||
if (topoOrder_.back()->GetEnabled()) {
|
||||
accumulate->addKernelName(topoOrder_.back()->GetKernelName());
|
||||
}
|
||||
} else {
|
||||
topoOrder_.back()->SetStream(hip_stream, this);
|
||||
status = topoOrder_.back()->CreateCommand(topoOrder_.back()->GetQueue());
|
||||
topoOrder_.back()->EnqueueCommands(stream);
|
||||
}
|
||||
|
||||
// If last packet is not kernel, submit a marker to detect end of graph
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && !isLastPacketKernel) {
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
|
||||
accumulate->enqueue();
|
||||
accumulate->release();
|
||||
}
|
||||
|
||||
@@ -785,11 +785,14 @@ class GraphKernelNode : public GraphNode {
|
||||
size_t alignedKernArgSize_; //!< Aligned size required for kernel args
|
||||
size_t kernargSegmentByteSize_; //!< Kernel arg segment byte size
|
||||
size_t kernargSegmentAlignment_; //!< Kernel arg segment alignment
|
||||
bool hasHiddenHeap_; //!< Kernel has hidden heap(device side allocation)
|
||||
|
||||
|
||||
public:
|
||||
size_t GetKerArgSize() const { return alignedKernArgSize_; }
|
||||
size_t GetKernargSegmentByteSize() const { return kernargSegmentByteSize_; }
|
||||
size_t GetKernargSegmentAlignment() const { return kernargSegmentAlignment_; }
|
||||
bool HasHiddenHeap() const { return hasHiddenHeap_; }
|
||||
void PrintAttributes(std::ostream& out, hipGraphDebugDotFlags flag) override {
|
||||
out << "[";
|
||||
out << "style";
|
||||
@@ -926,6 +929,11 @@ class GraphKernelNode : public GraphNode {
|
||||
}
|
||||
::memcpy(kernelParams_.kernelParams[i], (pNodeParams->kernelParams[i]), desc.size_);
|
||||
}
|
||||
for (uint32_t i = signature.numParameters(); i < signature.numParametersAll(); ++i) {
|
||||
if (signature.at(i).info_.oclObject_ == amd::KernelParameterDescriptor::HiddenHeap) {
|
||||
hasHiddenHeap_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate/assign memory if params are passed as part of 'extra'
|
||||
@@ -969,6 +977,7 @@ class GraphKernelNode : public GraphNode {
|
||||
}
|
||||
memset(&kernelAttr_, 0, sizeof(kernelAttr_));
|
||||
kernelAttrInUse_ = 0;
|
||||
hasHiddenHeap_ = false;
|
||||
}
|
||||
|
||||
~GraphKernelNode() { freeParams(); }
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur