SWDEV-467069 - Added safety check in activity prof for accumulate command
Adding a safety check prevents an invalid memory access if timestamps and kernelNames vectors are of different size. The patch also moves the addKernelNames for the accumulate command into dispatchAqlPacket function. Change-Id: Iea0927e1253800403a1ae3f3d72de1e7d96476c3
Этот коммит содержится в:
@@ -594,8 +594,9 @@ hipError_t EnqueueGraphWithSingleList(std::vector<hip::Node>& topoOrder, hip::St
|
||||
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && topoOrder[i]->GetType() == hipGraphNodeTypeKernel &&
|
||||
!reinterpret_cast<hip::GraphKernelNode*>(topoOrder[i])->HasHiddenHeap()) {
|
||||
if (topoOrder[i]->GetEnabled()) {
|
||||
hip_stream->vdev()->dispatchAqlPacket(topoOrder[i]->GetAqlPacket(), accumulate);
|
||||
accumulate->addKernelName(topoOrder[i]->GetKernelName());
|
||||
hip_stream->vdev()->dispatchAqlPacket(topoOrder[i]->GetAqlPacket(),
|
||||
topoOrder[i]->GetKernelName(),
|
||||
accumulate);
|
||||
}
|
||||
} else {
|
||||
topoOrder[i]->SetStream(hip_stream, graphExec);
|
||||
|
||||
@@ -1310,7 +1310,9 @@ class VirtualDevice : public amd::HeapObject {
|
||||
virtual bool isFenceDirty() const = 0;
|
||||
|
||||
//! Dispatch captured AQL packet
|
||||
virtual bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr) = 0;
|
||||
virtual bool dispatchAqlPacket(uint8_t* aqlpacket,
|
||||
const std::string& kernelName,
|
||||
amd::AccumulateCommand* vcmd = nullptr) = 0;
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
|
||||
@@ -357,8 +357,11 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
|
||||
bool isFenceDirty() const { return false; }
|
||||
|
||||
inline bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr) {
|
||||
return false; }
|
||||
inline bool dispatchAqlPacket(uint8_t* aqlpacket, const std::string& kernelName,
|
||||
amd::AccumulateCommand* vcmd = nullptr) {
|
||||
vcmd->addKernelName(kernelName);
|
||||
return false;
|
||||
}
|
||||
|
||||
void resetFenceDirty() {}
|
||||
|
||||
|
||||
@@ -983,19 +983,26 @@ bool VirtualGPU::dispatchAqlPacket(
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
inline bool VirtualGPU::dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd) {
|
||||
amd::ScopedLock lock(execution());
|
||||
if (vcmd != nullptr) {
|
||||
profilingBegin(*vcmd, true);
|
||||
inline bool VirtualGPU::dispatchAqlPacket(
|
||||
uint8_t* aqlpacket, const std::string& kernelName, amd::AccumulateCommand* vcmd) {
|
||||
|
||||
if (vcmd == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vcmd->addKernelName(kernelName);
|
||||
amd::ScopedLock lock(execution());
|
||||
|
||||
profilingBegin(*vcmd, true);
|
||||
|
||||
dispatchBlockingWait();
|
||||
auto packet = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(aqlpacket);
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Graph shader name : %s",
|
||||
vcmd->getKernelNames().back().c_str());
|
||||
kernelName.c_str());
|
||||
dispatchGenericAqlPacket(packet, packet->header, packet->setup, false);
|
||||
if (vcmd != nullptr) {
|
||||
profilingEnd(*vcmd);
|
||||
}
|
||||
|
||||
profilingEnd(*vcmd);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -426,7 +426,8 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Dispatches a barrier with blocking HSA signals
|
||||
void dispatchBlockingWait();
|
||||
|
||||
inline bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr);
|
||||
inline bool dispatchAqlPacket(uint8_t* aqlpacket, const std::string& kernelName,
|
||||
amd::AccumulateCommand* vcmd = nullptr);
|
||||
bool dispatchAqlPacket(hsa_kernel_dispatch_packet_t* packet, uint16_t header, uint16_t rest,
|
||||
bool blocking = true, bool capturing = false,
|
||||
const uint8_t* aqlPacket = nullptr);
|
||||
|
||||
@@ -105,13 +105,10 @@ void ReportActivity(const amd::Command& command) {
|
||||
auto timestamps = static_cast<const amd::AccumulateCommand&>(command).getTimestamps();
|
||||
std::vector<std::string> kernel_names =
|
||||
static_cast<const amd::AccumulateCommand&>(command).getKernelNames();
|
||||
for (uint32_t i = 0; i < timestamps.size(); i++) {
|
||||
for (uint32_t i = 0; i < timestamps.size() && i < kernel_names.size(); i++) {
|
||||
auto it = timestamps[i];
|
||||
record.begin_ns = it.first;
|
||||
record.end_ns = it.second;
|
||||
if (kernel_names[i].empty()) {
|
||||
LogError("kernel name cannot be empty");
|
||||
}
|
||||
record.kernel_name = kernel_names[i].c_str();
|
||||
function(ACTIVITY_DOMAIN_HIP_OPS, operation_id, &record);
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user