diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp index 5da0d6ba4c..6ea32142b9 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.cpp +++ b/projects/clr/hipamd/src/hip_graph_internal.cpp @@ -594,8 +594,9 @@ hipError_t EnqueueGraphWithSingleList(std::vector& topoOrder, hip::St if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && topoOrder[i]->GetType() == hipGraphNodeTypeKernel && !reinterpret_cast(topoOrder[i])->HasHiddenHeap()) { if (topoOrder[i]->GetEnabled()) { - hip_stream->vdev()->dispatchAqlPacket(topoOrder[i]->GetAqlPacket(), accumulate); - accumulate->addKernelName(topoOrder[i]->GetKernelName()); + hip_stream->vdev()->dispatchAqlPacket(topoOrder[i]->GetAqlPacket(), + topoOrder[i]->GetKernelName(), + accumulate); } } else { topoOrder[i]->SetStream(hip_stream, graphExec); diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 8c8cab605f..6989a1cb5e 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1310,7 +1310,9 @@ class VirtualDevice : public amd::HeapObject { virtual bool isFenceDirty() const = 0; //! Dispatch captured AQL packet - virtual bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr) = 0; + virtual bool dispatchAqlPacket(uint8_t* aqlpacket, + const std::string& kernelName, + amd::AccumulateCommand* vcmd = nullptr) = 0; private: //! Disable default copy constructor diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index def6cbbee5..0670ace213 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -357,8 +357,11 @@ class VirtualGPU : public device::VirtualDevice { bool isFenceDirty() const { return false; } - inline bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr) { - return false; } + inline bool dispatchAqlPacket(uint8_t* aqlpacket, const std::string& kernelName, + amd::AccumulateCommand* vcmd = nullptr) { + vcmd->addKernelName(kernelName); + return false; + } void resetFenceDirty() {} diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index b12f1ae220..34878c2bbb 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -983,19 +983,26 @@ bool VirtualGPU::dispatchAqlPacket( } // ================================================================================================ -inline bool VirtualGPU::dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd) { - amd::ScopedLock lock(execution()); - if (vcmd != nullptr) { - profilingBegin(*vcmd, true); +inline bool VirtualGPU::dispatchAqlPacket( + uint8_t* aqlpacket, const std::string& kernelName, amd::AccumulateCommand* vcmd) { + + if (vcmd == nullptr) { + return false; } + + vcmd->addKernelName(kernelName); + amd::ScopedLock lock(execution()); + + profilingBegin(*vcmd, true); + dispatchBlockingWait(); auto packet = reinterpret_cast(aqlpacket); ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Graph shader name : %s", - vcmd->getKernelNames().back().c_str()); + kernelName.c_str()); dispatchGenericAqlPacket(packet, packet->header, packet->setup, false); - if (vcmd != nullptr) { - profilingEnd(*vcmd); - } + + profilingEnd(*vcmd); + return true; } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index a48c179caa..abb67689bd 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -426,7 +426,8 @@ class VirtualGPU : public device::VirtualDevice { //! Dispatches a barrier with blocking HSA signals void dispatchBlockingWait(); - inline bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr); + inline bool dispatchAqlPacket(uint8_t* aqlpacket, const std::string& kernelName, + amd::AccumulateCommand* vcmd = nullptr); bool dispatchAqlPacket(hsa_kernel_dispatch_packet_t* packet, uint16_t header, uint16_t rest, bool blocking = true, bool capturing = false, const uint8_t* aqlPacket = nullptr); diff --git a/projects/clr/rocclr/platform/activity.cpp b/projects/clr/rocclr/platform/activity.cpp index 184969803a..a12110a015 100644 --- a/projects/clr/rocclr/platform/activity.cpp +++ b/projects/clr/rocclr/platform/activity.cpp @@ -105,13 +105,10 @@ void ReportActivity(const amd::Command& command) { auto timestamps = static_cast(command).getTimestamps(); std::vector kernel_names = static_cast(command).getKernelNames(); - for (uint32_t i = 0; i < timestamps.size(); i++) { + for (uint32_t i = 0; i < timestamps.size() && i < kernel_names.size(); i++) { auto it = timestamps[i]; record.begin_ns = it.first; record.end_ns = it.second; - if (kernel_names[i].empty()) { - LogError("kernel name cannot be empty"); - } record.kernel_name = kernel_names[i].c_str(); function(ACTIVITY_DOMAIN_HIP_OPS, operation_id, &record); }